1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
50 #include "target-def.h"
52 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_encode_section_info (tree, rtx, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
216 static rtx arm_dwarf_register_span (rtx);
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
258 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
262 static void arm_conditional_register_usage (void);
263 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
264 static unsigned int arm_autovectorize_vector_sizes (void);
265 static int arm_default_branch_cost (bool, bool);
266 static int arm_cortex_a5_branch_cost (bool, bool);
268 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
269 const unsigned char *sel);
272 /* Table of machine attributes. */
273 static const struct attribute_spec arm_attribute_table[] =
275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
276 affects_type_identity } */
277 /* Function calls made to this symbol must be done indirectly, because
278 it may lie outside of the 26 bit addressing range of a normal function
280 { "long_call", 0, 0, false, true, true, NULL, false },
281 /* Whereas these functions are always known to reside within the 26 bit
283 { "short_call", 0, 0, false, true, true, NULL, false },
284 /* Specify the procedure call conventions for a function. */
285 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
287 /* Interrupt Service Routines have special prologue and epilogue requirements. */
288 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
290 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
292 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 /* ARM/PE has three new attributes:
297 dllexport - for exporting a function/variable that will live in a dll
298 dllimport - for importing a function/variable from a dll
300 Microsoft allows multiple declspecs in one __declspec, separating
301 them with spaces. We do NOT support this. Instead, use __declspec
304 { "dllimport", 0, 0, true, false, false, NULL, false },
305 { "dllexport", 0, 0, true, false, false, NULL, false },
306 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
308 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
309 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
310 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
311 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
314 { NULL, 0, 0, false, false, false, NULL, false }
317 /* Initialize the GCC target structure. */
318 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
319 #undef TARGET_MERGE_DECL_ATTRIBUTES
320 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
323 #undef TARGET_LEGITIMIZE_ADDRESS
324 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
326 #undef TARGET_ATTRIBUTE_TABLE
327 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
329 #undef TARGET_ASM_FILE_START
330 #define TARGET_ASM_FILE_START arm_file_start
331 #undef TARGET_ASM_FILE_END
332 #define TARGET_ASM_FILE_END arm_file_end
334 #undef TARGET_ASM_ALIGNED_SI_OP
335 #define TARGET_ASM_ALIGNED_SI_OP NULL
336 #undef TARGET_ASM_INTEGER
337 #define TARGET_ASM_INTEGER arm_assemble_integer
339 #undef TARGET_PRINT_OPERAND
340 #define TARGET_PRINT_OPERAND arm_print_operand
341 #undef TARGET_PRINT_OPERAND_ADDRESS
342 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
343 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
344 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
346 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
347 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
349 #undef TARGET_ASM_FUNCTION_PROLOGUE
350 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
352 #undef TARGET_ASM_FUNCTION_EPILOGUE
353 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
355 #undef TARGET_OPTION_OVERRIDE
356 #define TARGET_OPTION_OVERRIDE arm_option_override
358 #undef TARGET_COMP_TYPE_ATTRIBUTES
359 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
361 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
362 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
364 #undef TARGET_SCHED_ADJUST_COST
365 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
367 #undef TARGET_REGISTER_MOVE_COST
368 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
370 #undef TARGET_MEMORY_MOVE_COST
371 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
373 #undef TARGET_ENCODE_SECTION_INFO
375 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
377 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
380 #undef TARGET_STRIP_NAME_ENCODING
381 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
383 #undef TARGET_ASM_INTERNAL_LABEL
384 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
386 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
387 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
389 #undef TARGET_FUNCTION_VALUE
390 #define TARGET_FUNCTION_VALUE arm_function_value
392 #undef TARGET_LIBCALL_VALUE
393 #define TARGET_LIBCALL_VALUE arm_libcall_value
395 #undef TARGET_FUNCTION_VALUE_REGNO_P
396 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
398 #undef TARGET_ASM_OUTPUT_MI_THUNK
399 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
400 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
401 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
403 #undef TARGET_RTX_COSTS
404 #define TARGET_RTX_COSTS arm_rtx_costs
405 #undef TARGET_ADDRESS_COST
406 #define TARGET_ADDRESS_COST arm_address_cost
408 #undef TARGET_SHIFT_TRUNCATION_MASK
409 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
410 #undef TARGET_VECTOR_MODE_SUPPORTED_P
411 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
412 #undef TARGET_ARRAY_MODE_SUPPORTED_P
413 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
414 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
415 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
416 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
417 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
418 arm_autovectorize_vector_sizes
420 #undef TARGET_MACHINE_DEPENDENT_REORG
421 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
423 #undef TARGET_INIT_BUILTINS
424 #define TARGET_INIT_BUILTINS arm_init_builtins
425 #undef TARGET_EXPAND_BUILTIN
426 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
427 #undef TARGET_BUILTIN_DECL
428 #define TARGET_BUILTIN_DECL arm_builtin_decl
430 #undef TARGET_INIT_LIBFUNCS
431 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
433 #undef TARGET_PROMOTE_FUNCTION_MODE
434 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
435 #undef TARGET_PROMOTE_PROTOTYPES
436 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
437 #undef TARGET_PASS_BY_REFERENCE
438 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
441 #undef TARGET_FUNCTION_ARG
442 #define TARGET_FUNCTION_ARG arm_function_arg
443 #undef TARGET_FUNCTION_ARG_ADVANCE
444 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
445 #undef TARGET_FUNCTION_ARG_BOUNDARY
446 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
448 #undef TARGET_SETUP_INCOMING_VARARGS
449 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
451 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
452 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
454 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
455 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
456 #undef TARGET_TRAMPOLINE_INIT
457 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
458 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
459 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
461 #undef TARGET_DEFAULT_SHORT_ENUMS
462 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
464 #undef TARGET_ALIGN_ANON_BITFIELD
465 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
467 #undef TARGET_NARROW_VOLATILE_BITFIELD
468 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
470 #undef TARGET_CXX_GUARD_TYPE
471 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
473 #undef TARGET_CXX_GUARD_MASK_BIT
474 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
476 #undef TARGET_CXX_GET_COOKIE_SIZE
477 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
479 #undef TARGET_CXX_COOKIE_HAS_SIZE
480 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
482 #undef TARGET_CXX_CDTOR_RETURNS_THIS
483 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
485 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
486 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
488 #undef TARGET_CXX_USE_AEABI_ATEXIT
489 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
491 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
492 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
493 arm_cxx_determine_class_data_visibility
495 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
496 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
498 #undef TARGET_RETURN_IN_MSB
499 #define TARGET_RETURN_IN_MSB arm_return_in_msb
501 #undef TARGET_RETURN_IN_MEMORY
502 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
504 #undef TARGET_MUST_PASS_IN_STACK
505 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
508 #undef TARGET_ASM_UNWIND_EMIT
509 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
511 /* EABI unwinding tables use a different format for the typeinfo tables. */
512 #undef TARGET_ASM_TTYPE
513 #define TARGET_ASM_TTYPE arm_output_ttype
515 #undef TARGET_ARM_EABI_UNWINDER
516 #define TARGET_ARM_EABI_UNWINDER true
518 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
519 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
521 #undef TARGET_ASM_INIT_SECTIONS
522 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
523 #endif /* ARM_UNWIND_INFO */
525 #undef TARGET_DWARF_REGISTER_SPAN
526 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
528 #undef TARGET_CANNOT_COPY_INSN_P
529 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
532 #undef TARGET_HAVE_TLS
533 #define TARGET_HAVE_TLS true
536 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
537 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
539 #undef TARGET_LEGITIMATE_CONSTANT_P
540 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
542 #undef TARGET_CANNOT_FORCE_CONST_MEM
543 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
545 #undef TARGET_MAX_ANCHOR_OFFSET
546 #define TARGET_MAX_ANCHOR_OFFSET 4095
548 /* The minimum is set such that the total size of the block
549 for a particular anchor is -4088 + 1 + 4095 bytes, which is
550 divisible by eight, ensuring natural spacing of anchors. */
551 #undef TARGET_MIN_ANCHOR_OFFSET
552 #define TARGET_MIN_ANCHOR_OFFSET -4088
554 #undef TARGET_SCHED_ISSUE_RATE
555 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
557 #undef TARGET_MANGLE_TYPE
558 #define TARGET_MANGLE_TYPE arm_mangle_type
560 #undef TARGET_BUILD_BUILTIN_VA_LIST
561 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
562 #undef TARGET_EXPAND_BUILTIN_VA_START
563 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
564 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
565 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
568 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
569 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
572 #undef TARGET_LEGITIMATE_ADDRESS_P
573 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
575 #undef TARGET_PREFERRED_RELOAD_CLASS
576 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
578 #undef TARGET_INVALID_PARAMETER_TYPE
579 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
581 #undef TARGET_INVALID_RETURN_TYPE
582 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
584 #undef TARGET_PROMOTED_TYPE
585 #define TARGET_PROMOTED_TYPE arm_promoted_type
587 #undef TARGET_CONVERT_TO_TYPE
588 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
590 #undef TARGET_SCALAR_MODE_SUPPORTED_P
591 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
593 #undef TARGET_FRAME_POINTER_REQUIRED
594 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
596 #undef TARGET_CAN_ELIMINATE
597 #define TARGET_CAN_ELIMINATE arm_can_eliminate
599 #undef TARGET_CONDITIONAL_REGISTER_USAGE
600 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
602 #undef TARGET_CLASS_LIKELY_SPILLED_P
603 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
605 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
606 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
607 arm_vector_alignment_reachable
609 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
610 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
611 arm_builtin_support_vector_misalignment
613 #undef TARGET_PREFERRED_RENAME_CLASS
614 #define TARGET_PREFERRED_RENAME_CLASS \
615 arm_preferred_rename_class
617 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
618 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
619 arm_vectorize_vec_perm_const_ok
621 struct gcc_target targetm = TARGET_INITIALIZER;
623 /* Obstack for minipool constant handling. */
624 static struct obstack minipool_obstack;
625 static char * minipool_startobj;
627 /* The maximum number of insns skipped which
628 will be conditionalised if possible. */
629 static int max_insns_skipped = 5;
631 extern FILE * asm_out_file;
633 /* True if we are currently building a constant table. */
634 int making_const_table;
636 /* The processor for which instructions should be scheduled. */
637 enum processor_type arm_tune = arm_none;
639 /* The current tuning set. */
640 const struct tune_params *current_tune;
642 /* Which floating point hardware to schedule for. */
645 /* Which floating popint hardware to use. */
646 const struct arm_fpu_desc *arm_fpu_desc;
648 /* Used for Thumb call_via trampolines. */
649 rtx thumb_call_via_label[14];
650 static int thumb_call_reg_needed;
652 /* Bit values used to identify processor capabilities. */
653 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
654 #define FL_ARCH3M (1 << 1) /* Extended multiply */
655 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
656 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
657 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
658 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
659 #define FL_THUMB (1 << 6) /* Thumb aware */
660 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
661 #define FL_STRONG (1 << 8) /* StrongARM */
662 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
663 #define FL_XSCALE (1 << 10) /* XScale */
664 /* spare (1 << 11) */
665 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
666 media instructions. */
667 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
668 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
669 Note: ARM6 & 7 derivatives only. */
670 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
671 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
672 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
674 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
675 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
676 #define FL_NEON (1 << 20) /* Neon instructions. */
677 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
679 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
680 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
682 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
683 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
685 /* Flags that only effect tuning, not available instructions. */
686 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
689 #define FL_FOR_ARCH2 FL_NOTM
690 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
691 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
692 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
693 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
694 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
695 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
696 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
697 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
698 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
700 #define FL_FOR_ARCH6J FL_FOR_ARCH6
701 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
702 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
703 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
704 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
705 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
706 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
709 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
710 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
712 /* The bits in this mask specify which
713 instructions we are allowed to generate. */
714 static unsigned long insn_flags = 0;
716 /* The bits in this mask specify which instruction scheduling options should
718 static unsigned long tune_flags = 0;
720 /* The highest ARM architecture version supported by the
722 enum base_architecture arm_base_arch = BASE_ARCH_0;
724 /* The following are used in the arm.md file as equivalents to bits
725 in the above two flag variables. */
727 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
730 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
733 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
736 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
739 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
742 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
745 /* Nonzero if this chip supports the ARM 6K extensions. */
748 /* Nonzero if this chip supports the ARM 7 extensions. */
751 /* Nonzero if instructions not present in the 'M' profile can be used. */
752 int arm_arch_notm = 0;
754 /* Nonzero if instructions present in ARMv7E-M can be used. */
757 /* Nonzero if this chip can benefit from load scheduling. */
758 int arm_ld_sched = 0;
760 /* Nonzero if this chip is a StrongARM. */
761 int arm_tune_strongarm = 0;
763 /* Nonzero if this chip supports Intel Wireless MMX technology. */
764 int arm_arch_iwmmxt = 0;
766 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
767 int arm_arch_iwmmxt2 = 0;
769 /* Nonzero if this chip is an XScale. */
770 int arm_arch_xscale = 0;
772 /* Nonzero if tuning for XScale */
773 int arm_tune_xscale = 0;
775 /* Nonzero if we want to tune for stores that access the write-buffer.
776 This typically means an ARM6 or ARM7 with MMU or MPU. */
777 int arm_tune_wbuf = 0;
779 /* Nonzero if tuning for Cortex-A9. */
780 int arm_tune_cortex_a9 = 0;
782 /* Nonzero if generating Thumb instructions. */
785 /* Nonzero if generating Thumb-1 instructions. */
788 /* Nonzero if we should define __THUMB_INTERWORK__ in the
790 XXX This is a bit of a hack, it's intended to help work around
791 problems in GLD which doesn't understand that armv5t code is
792 interworking clean. */
793 int arm_cpp_interwork = 0;
795 /* Nonzero if chip supports Thumb 2. */
798 /* Nonzero if chip supports integer division instruction. */
799 int arm_arch_arm_hwdiv;
800 int arm_arch_thumb_hwdiv;
802 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
803 we must report the mode of the memory reference from
804 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
805 enum machine_mode output_memory_reference_mode;
807 /* The register number to be used for the PIC offset register. */
808 unsigned arm_pic_register = INVALID_REGNUM;
810 /* Set to 1 after arm_reorg has started. Reset to start at the start of
811 the next function. */
812 static int after_arm_reorg = 0;
814 enum arm_pcs arm_pcs_default;
816 /* For an explanation of these variables, see final_prescan_insn below. */
818 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
819 enum arm_cond_code arm_current_cc;
822 int arm_target_label;
823 /* The number of conditionally executed insns, including the current insn. */
824 int arm_condexec_count = 0;
825 /* A bitmask specifying the patterns for the IT block.
826 Zero means do not output an IT block before this insn. */
827 int arm_condexec_mask = 0;
828 /* The number of bits used in arm_condexec_mask. */
829 int arm_condexec_masklen = 0;
831 /* The condition codes of the ARM, and the inverse function. */
832 static const char * const arm_condition_codes[] =
834 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
835 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
838 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
839 int arm_regs_in_sequence[] =
841 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
844 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
845 #define streq(string1, string2) (strcmp (string1, string2) == 0)
847 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
848 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
849 | (1 << PIC_OFFSET_TABLE_REGNUM)))
851 /* Initialization code. */
855 const char *const name;
856 enum processor_type core;
858 enum base_architecture base_arch;
859 const unsigned long flags;
860 const struct tune_params *const tune;
864 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
865 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
870 const struct tune_params arm_slowmul_tune =
872 arm_slowmul_rtx_costs,
874 3, /* Constant limit. */
875 5, /* Max cond insns. */
876 ARM_PREFETCH_NOT_BENEFICIAL,
877 true, /* Prefer constant pool. */
878 arm_default_branch_cost
881 const struct tune_params arm_fastmul_tune =
883 arm_fastmul_rtx_costs,
885 1, /* Constant limit. */
886 5, /* Max cond insns. */
887 ARM_PREFETCH_NOT_BENEFICIAL,
888 true, /* Prefer constant pool. */
889 arm_default_branch_cost
892 /* StrongARM has early execution of branches, so a sequence that is worth
893 skipping is shorter. Set max_insns_skipped to a lower value. */
895 const struct tune_params arm_strongarm_tune =
897 arm_fastmul_rtx_costs,
899 1, /* Constant limit. */
900 3, /* Max cond insns. */
901 ARM_PREFETCH_NOT_BENEFICIAL,
902 true, /* Prefer constant pool. */
903 arm_default_branch_cost
906 const struct tune_params arm_xscale_tune =
908 arm_xscale_rtx_costs,
909 xscale_sched_adjust_cost,
910 2, /* Constant limit. */
911 3, /* Max cond insns. */
912 ARM_PREFETCH_NOT_BENEFICIAL,
913 true, /* Prefer constant pool. */
914 arm_default_branch_cost
917 const struct tune_params arm_9e_tune =
921 1, /* Constant limit. */
922 5, /* Max cond insns. */
923 ARM_PREFETCH_NOT_BENEFICIAL,
924 true, /* Prefer constant pool. */
925 arm_default_branch_cost
928 const struct tune_params arm_v6t2_tune =
932 1, /* Constant limit. */
933 5, /* Max cond insns. */
934 ARM_PREFETCH_NOT_BENEFICIAL,
935 false, /* Prefer constant pool. */
936 arm_default_branch_cost
939 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
940 const struct tune_params arm_cortex_tune =
944 1, /* Constant limit. */
945 5, /* Max cond insns. */
946 ARM_PREFETCH_NOT_BENEFICIAL,
947 false, /* Prefer constant pool. */
948 arm_default_branch_cost
951 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
952 less appealing. Set max_insns_skipped to a low value. */
954 const struct tune_params arm_cortex_a5_tune =
958 1, /* Constant limit. */
959 1, /* Max cond insns. */
960 ARM_PREFETCH_NOT_BENEFICIAL,
961 false, /* Prefer constant pool. */
962 arm_cortex_a5_branch_cost
965 const struct tune_params arm_cortex_a9_tune =
968 cortex_a9_sched_adjust_cost,
969 1, /* Constant limit. */
970 5, /* Max cond insns. */
971 ARM_PREFETCH_BENEFICIAL(4,32,32),
972 false, /* Prefer constant pool. */
973 arm_default_branch_cost
976 const struct tune_params arm_fa726te_tune =
979 fa726te_sched_adjust_cost,
980 1, /* Constant limit. */
981 5, /* Max cond insns. */
982 ARM_PREFETCH_NOT_BENEFICIAL,
983 true, /* Prefer constant pool. */
984 arm_default_branch_cost
988 /* Not all of these give usefully different compilation alternatives,
989 but there is no simple way of generalizing them. */
990 static const struct processors all_cores[] =
993 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
994 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
995 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
996 #include "arm-cores.def"
998 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1001 static const struct processors all_architectures[] =
1003 /* ARM Architectures */
1004 /* We don't specify tuning costs here as it will be figured out
1007 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1008 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1009 #include "arm-arches.def"
1011 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1015 /* These are populated as commandline arguments are processed, or NULL
1016 if not specified. */
1017 static const struct processors *arm_selected_arch;
1018 static const struct processors *arm_selected_cpu;
1019 static const struct processors *arm_selected_tune;
1021 /* The name of the preprocessor macro to define for this architecture. */
1023 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1025 /* Available values for -mfpu=. */
1027 static const struct arm_fpu_desc all_fpus[] =
1029 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1030 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1031 #include "arm-fpus.def"
1036 /* Supported TLS relocations. */
1044 TLS_DESCSEQ /* GNU scheme */
1047 /* The maximum number of insns to be used when loading a constant. */
1049 arm_constant_limit (bool size_p)
1051 return size_p ? 1 : current_tune->constant_limit;
1054 /* Emit an insn that's a simple single-set. Both the operands must be known
1057 emit_set_insn (rtx x, rtx y)
1059 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1062 /* Return the number of bits set in VALUE. */
1064 bit_count (unsigned long value)
1066 unsigned long count = 0;
1071 value &= value - 1; /* Clear the least-significant set bit. */
1079 enum machine_mode mode;
1081 } arm_fixed_mode_set;
1083 /* A small helper for setting fixed-point library libfuncs. */
1086 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1087 const char *funcname, const char *modename,
1092 if (num_suffix == 0)
1093 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1095 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1097 set_optab_libfunc (optable, mode, buffer);
1101 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1102 enum machine_mode from, const char *funcname,
1103 const char *toname, const char *fromname)
1106 const char *maybe_suffix_2 = "";
1108 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1109 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1110 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1111 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1112 maybe_suffix_2 = "2";
1114 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1117 set_conv_libfunc (optable, to, from, buffer);
1120 /* Set up library functions unique to ARM. */
1123 arm_init_libfuncs (void)
1125 /* For Linux, we have access to kernel support for atomic operations. */
1126 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1127 init_sync_libfuncs (2 * UNITS_PER_WORD);
1129 /* There are no special library functions unless we are using the
1134 /* The functions below are described in Section 4 of the "Run-Time
1135 ABI for the ARM architecture", Version 1.0. */
1137 /* Double-precision floating-point arithmetic. Table 2. */
1138 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1139 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1140 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1141 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1142 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1144 /* Double-precision comparisons. Table 3. */
1145 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1146 set_optab_libfunc (ne_optab, DFmode, NULL);
1147 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1148 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1149 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1150 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1151 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1153 /* Single-precision floating-point arithmetic. Table 4. */
1154 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1155 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1156 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1157 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1158 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1160 /* Single-precision comparisons. Table 5. */
1161 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1162 set_optab_libfunc (ne_optab, SFmode, NULL);
1163 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1164 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1165 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1166 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1167 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1169 /* Floating-point to integer conversions. Table 6. */
1170 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1171 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1172 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1173 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1174 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1175 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1176 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1177 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1179 /* Conversions between floating types. Table 7. */
1180 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1181 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1183 /* Integer to floating-point conversions. Table 8. */
1184 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1185 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1186 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1187 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1188 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1189 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1190 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1191 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1193 /* Long long. Table 9. */
1194 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1195 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1196 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1197 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1198 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1199 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1200 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1201 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1203 /* Integer (32/32->32) division. \S 4.3.1. */
1204 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1205 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1207 /* The divmod functions are designed so that they can be used for
1208 plain division, even though they return both the quotient and the
1209 remainder. The quotient is returned in the usual location (i.e.,
1210 r0 for SImode, {r0, r1} for DImode), just as would be expected
1211 for an ordinary division routine. Because the AAPCS calling
1212 conventions specify that all of { r0, r1, r2, r3 } are
1213 callee-saved registers, there is no need to tell the compiler
1214 explicitly that those registers are clobbered by these
1216 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1217 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1219 /* For SImode division the ABI provides div-without-mod routines,
1220 which are faster. */
1221 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1222 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1224 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1225 divmod libcalls instead. */
1226 set_optab_libfunc (smod_optab, DImode, NULL);
1227 set_optab_libfunc (umod_optab, DImode, NULL);
1228 set_optab_libfunc (smod_optab, SImode, NULL);
1229 set_optab_libfunc (umod_optab, SImode, NULL);
1231 /* Half-precision float operations. The compiler handles all operations
1232 with NULL libfuncs by converting the SFmode. */
1233 switch (arm_fp16_format)
1235 case ARM_FP16_FORMAT_IEEE:
1236 case ARM_FP16_FORMAT_ALTERNATIVE:
1239 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1240 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1242 : "__gnu_f2h_alternative"));
1243 set_conv_libfunc (sext_optab, SFmode, HFmode,
1244 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1246 : "__gnu_h2f_alternative"));
1249 set_optab_libfunc (add_optab, HFmode, NULL);
1250 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1251 set_optab_libfunc (smul_optab, HFmode, NULL);
1252 set_optab_libfunc (neg_optab, HFmode, NULL);
1253 set_optab_libfunc (sub_optab, HFmode, NULL);
1256 set_optab_libfunc (eq_optab, HFmode, NULL);
1257 set_optab_libfunc (ne_optab, HFmode, NULL);
1258 set_optab_libfunc (lt_optab, HFmode, NULL);
1259 set_optab_libfunc (le_optab, HFmode, NULL);
1260 set_optab_libfunc (ge_optab, HFmode, NULL);
1261 set_optab_libfunc (gt_optab, HFmode, NULL);
1262 set_optab_libfunc (unord_optab, HFmode, NULL);
1269 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1271 const arm_fixed_mode_set fixed_arith_modes[] =
1292 const arm_fixed_mode_set fixed_conv_modes[] =
1322 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1324 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1325 "add", fixed_arith_modes[i].name, 3);
1326 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1327 "ssadd", fixed_arith_modes[i].name, 3);
1328 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1329 "usadd", fixed_arith_modes[i].name, 3);
1330 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1331 "sub", fixed_arith_modes[i].name, 3);
1332 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1333 "sssub", fixed_arith_modes[i].name, 3);
1334 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1335 "ussub", fixed_arith_modes[i].name, 3);
1336 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1337 "mul", fixed_arith_modes[i].name, 3);
1338 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1339 "ssmul", fixed_arith_modes[i].name, 3);
1340 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1341 "usmul", fixed_arith_modes[i].name, 3);
1342 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1343 "div", fixed_arith_modes[i].name, 3);
1344 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1345 "udiv", fixed_arith_modes[i].name, 3);
1346 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1347 "ssdiv", fixed_arith_modes[i].name, 3);
1348 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1349 "usdiv", fixed_arith_modes[i].name, 3);
1350 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1351 "neg", fixed_arith_modes[i].name, 2);
1352 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1353 "ssneg", fixed_arith_modes[i].name, 2);
1354 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1355 "usneg", fixed_arith_modes[i].name, 2);
1356 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1357 "ashl", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1359 "ashr", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1361 "lshr", fixed_arith_modes[i].name, 3);
1362 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1363 "ssashl", fixed_arith_modes[i].name, 3);
1364 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1365 "usashl", fixed_arith_modes[i].name, 3);
1366 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1367 "cmp", fixed_arith_modes[i].name, 2);
1370 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1371 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1374 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1375 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1378 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1379 fixed_conv_modes[j].mode, "fract",
1380 fixed_conv_modes[i].name,
1381 fixed_conv_modes[j].name);
1382 arm_set_fixed_conv_libfunc (satfract_optab,
1383 fixed_conv_modes[i].mode,
1384 fixed_conv_modes[j].mode, "satfract",
1385 fixed_conv_modes[i].name,
1386 fixed_conv_modes[j].name);
1387 arm_set_fixed_conv_libfunc (fractuns_optab,
1388 fixed_conv_modes[i].mode,
1389 fixed_conv_modes[j].mode, "fractuns",
1390 fixed_conv_modes[i].name,
1391 fixed_conv_modes[j].name);
1392 arm_set_fixed_conv_libfunc (satfractuns_optab,
1393 fixed_conv_modes[i].mode,
1394 fixed_conv_modes[j].mode, "satfractuns",
1395 fixed_conv_modes[i].name,
1396 fixed_conv_modes[j].name);
1400 if (TARGET_AAPCS_BASED)
1401 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1404 /* On AAPCS systems, this is the "struct __va_list". */
1405 static GTY(()) tree va_list_type;
1407 /* Return the type to use as __builtin_va_list. */
1409 arm_build_builtin_va_list (void)
1414 if (!TARGET_AAPCS_BASED)
1415 return std_build_builtin_va_list ();
1417 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1425 The C Library ABI further reinforces this definition in \S
1428 We must follow this definition exactly. The structure tag
1429 name is visible in C++ mangled names, and thus forms a part
1430 of the ABI. The field name may be used by people who
1431 #include <stdarg.h>. */
1432 /* Create the type. */
1433 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1434 /* Give it the required name. */
1435 va_list_name = build_decl (BUILTINS_LOCATION,
1437 get_identifier ("__va_list"),
1439 DECL_ARTIFICIAL (va_list_name) = 1;
1440 TYPE_NAME (va_list_type) = va_list_name;
1441 TYPE_STUB_DECL (va_list_type) = va_list_name;
1442 /* Create the __ap field. */
1443 ap_field = build_decl (BUILTINS_LOCATION,
1445 get_identifier ("__ap"),
1447 DECL_ARTIFICIAL (ap_field) = 1;
1448 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1449 TYPE_FIELDS (va_list_type) = ap_field;
1450 /* Compute its layout. */
1451 layout_type (va_list_type);
1453 return va_list_type;
1456 /* Return an expression of type "void *" pointing to the next
1457 available argument in a variable-argument list. VALIST is the
1458 user-level va_list object, of type __builtin_va_list. */
1460 arm_extract_valist_ptr (tree valist)
1462 if (TREE_TYPE (valist) == error_mark_node)
1463 return error_mark_node;
1465 /* On an AAPCS target, the pointer is stored within "struct
1467 if (TARGET_AAPCS_BASED)
1469 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1470 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1471 valist, ap_field, NULL_TREE);
1477 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1479 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1481 valist = arm_extract_valist_ptr (valist);
1482 std_expand_builtin_va_start (valist, nextarg);
1485 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1487 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1490 valist = arm_extract_valist_ptr (valist);
1491 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1494 /* Fix up any incompatible options that the user has specified. */
1496 arm_option_override (void)
1498 if (global_options_set.x_arm_arch_option)
1499 arm_selected_arch = &all_architectures[arm_arch_option];
1501 if (global_options_set.x_arm_cpu_option)
1502 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1504 if (global_options_set.x_arm_tune_option)
1505 arm_selected_tune = &all_cores[(int) arm_tune_option];
1507 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1508 SUBTARGET_OVERRIDE_OPTIONS;
1511 if (arm_selected_arch)
1513 if (arm_selected_cpu)
1515 /* Check for conflict between mcpu and march. */
1516 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1518 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1519 arm_selected_cpu->name, arm_selected_arch->name);
1520 /* -march wins for code generation.
1521 -mcpu wins for default tuning. */
1522 if (!arm_selected_tune)
1523 arm_selected_tune = arm_selected_cpu;
1525 arm_selected_cpu = arm_selected_arch;
1529 arm_selected_arch = NULL;
1532 /* Pick a CPU based on the architecture. */
1533 arm_selected_cpu = arm_selected_arch;
1536 /* If the user did not specify a processor, choose one for them. */
1537 if (!arm_selected_cpu)
1539 const struct processors * sel;
1540 unsigned int sought;
1542 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1543 if (!arm_selected_cpu->name)
1545 #ifdef SUBTARGET_CPU_DEFAULT
1546 /* Use the subtarget default CPU if none was specified by
1548 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1550 /* Default to ARM6. */
1551 if (!arm_selected_cpu->name)
1552 arm_selected_cpu = &all_cores[arm6];
1555 sel = arm_selected_cpu;
1556 insn_flags = sel->flags;
1558 /* Now check to see if the user has specified some command line
1559 switch that require certain abilities from the cpu. */
1562 if (TARGET_INTERWORK || TARGET_THUMB)
1564 sought |= (FL_THUMB | FL_MODE32);
1566 /* There are no ARM processors that support both APCS-26 and
1567 interworking. Therefore we force FL_MODE26 to be removed
1568 from insn_flags here (if it was set), so that the search
1569 below will always be able to find a compatible processor. */
1570 insn_flags &= ~FL_MODE26;
1573 if (sought != 0 && ((sought & insn_flags) != sought))
1575 /* Try to locate a CPU type that supports all of the abilities
1576 of the default CPU, plus the extra abilities requested by
1578 for (sel = all_cores; sel->name != NULL; sel++)
1579 if ((sel->flags & sought) == (sought | insn_flags))
1582 if (sel->name == NULL)
1584 unsigned current_bit_count = 0;
1585 const struct processors * best_fit = NULL;
1587 /* Ideally we would like to issue an error message here
1588 saying that it was not possible to find a CPU compatible
1589 with the default CPU, but which also supports the command
1590 line options specified by the programmer, and so they
1591 ought to use the -mcpu=<name> command line option to
1592 override the default CPU type.
1594 If we cannot find a cpu that has both the
1595 characteristics of the default cpu and the given
1596 command line options we scan the array again looking
1597 for a best match. */
1598 for (sel = all_cores; sel->name != NULL; sel++)
1599 if ((sel->flags & sought) == sought)
1603 count = bit_count (sel->flags & insn_flags);
1605 if (count >= current_bit_count)
1608 current_bit_count = count;
1612 gcc_assert (best_fit);
1616 arm_selected_cpu = sel;
1620 gcc_assert (arm_selected_cpu);
1621 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1622 if (!arm_selected_tune)
1623 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1625 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1626 insn_flags = arm_selected_cpu->flags;
1627 arm_base_arch = arm_selected_cpu->base_arch;
1629 arm_tune = arm_selected_tune->core;
1630 tune_flags = arm_selected_tune->flags;
1631 current_tune = arm_selected_tune->tune;
1633 /* Make sure that the processor choice does not conflict with any of the
1634 other command line choices. */
1635 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1636 error ("target CPU does not support ARM mode");
1638 /* BPABI targets use linker tricks to allow interworking on cores
1639 without thumb support. */
1640 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1642 warning (0, "target CPU does not support interworking" );
1643 target_flags &= ~MASK_INTERWORK;
1646 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1648 warning (0, "target CPU does not support THUMB instructions");
1649 target_flags &= ~MASK_THUMB;
1652 if (TARGET_APCS_FRAME && TARGET_THUMB)
1654 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1655 target_flags &= ~MASK_APCS_FRAME;
1658 /* Callee super interworking implies thumb interworking. Adding
1659 this to the flags here simplifies the logic elsewhere. */
1660 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1661 target_flags |= MASK_INTERWORK;
1663 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1664 from here where no function is being compiled currently. */
1665 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1666 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1668 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1669 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1671 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1673 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1674 target_flags |= MASK_APCS_FRAME;
1677 if (TARGET_POKE_FUNCTION_NAME)
1678 target_flags |= MASK_APCS_FRAME;
1680 if (TARGET_APCS_REENT && flag_pic)
1681 error ("-fpic and -mapcs-reent are incompatible");
1683 if (TARGET_APCS_REENT)
1684 warning (0, "APCS reentrant code not supported. Ignored");
1686 /* If this target is normally configured to use APCS frames, warn if they
1687 are turned off and debugging is turned on. */
1689 && write_symbols != NO_DEBUG
1690 && !TARGET_APCS_FRAME
1691 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1692 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1694 if (TARGET_APCS_FLOAT)
1695 warning (0, "passing floating point arguments in fp regs not yet supported");
1697 if (TARGET_LITTLE_WORDS)
1698 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1699 "will be removed in a future release");
1701 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1702 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1703 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1704 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1705 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1706 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1707 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1708 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1709 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1710 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1711 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1712 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1713 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1715 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1716 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1717 thumb_code = TARGET_ARM == 0;
1718 thumb1_code = TARGET_THUMB1 != 0;
1719 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1720 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1721 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1722 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1723 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1724 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1725 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1727 /* If we are not using the default (ARM mode) section anchor offset
1728 ranges, then set the correct ranges now. */
1731 /* Thumb-1 LDR instructions cannot have negative offsets.
1732 Permissible positive offset ranges are 5-bit (for byte loads),
1733 6-bit (for halfword loads), or 7-bit (for word loads).
1734 Empirical results suggest a 7-bit anchor range gives the best
1735 overall code size. */
1736 targetm.min_anchor_offset = 0;
1737 targetm.max_anchor_offset = 127;
1739 else if (TARGET_THUMB2)
1741 /* The minimum is set such that the total size of the block
1742 for a particular anchor is 248 + 1 + 4095 bytes, which is
1743 divisible by eight, ensuring natural spacing of anchors. */
1744 targetm.min_anchor_offset = -248;
1745 targetm.max_anchor_offset = 4095;
1748 /* V5 code we generate is completely interworking capable, so we turn off
1749 TARGET_INTERWORK here to avoid many tests later on. */
1751 /* XXX However, we must pass the right pre-processor defines to CPP
1752 or GLD can get confused. This is a hack. */
1753 if (TARGET_INTERWORK)
1754 arm_cpp_interwork = 1;
1757 target_flags &= ~MASK_INTERWORK;
1759 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1760 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1762 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1763 error ("iwmmxt abi requires an iwmmxt capable cpu");
1765 if (!global_options_set.x_arm_fpu_index)
1767 const char *target_fpu_name;
1770 #ifdef FPUTYPE_DEFAULT
1771 target_fpu_name = FPUTYPE_DEFAULT;
1773 target_fpu_name = "vfp";
1776 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1781 arm_fpu_desc = &all_fpus[arm_fpu_index];
1783 switch (arm_fpu_desc->model)
1785 case ARM_FP_MODEL_VFP:
1786 arm_fpu_attr = FPU_VFP;
1793 if (TARGET_AAPCS_BASED)
1795 if (TARGET_CALLER_INTERWORKING)
1796 error ("AAPCS does not support -mcaller-super-interworking");
1798 if (TARGET_CALLEE_INTERWORKING)
1799 error ("AAPCS does not support -mcallee-super-interworking");
1802 /* iWMMXt and NEON are incompatible. */
1803 if (TARGET_IWMMXT && TARGET_NEON)
1804 error ("iWMMXt and NEON are incompatible");
1806 /* iWMMXt unsupported under Thumb mode. */
1807 if (TARGET_THUMB && TARGET_IWMMXT)
1808 error ("iWMMXt unsupported under Thumb mode");
1810 /* __fp16 support currently assumes the core has ldrh. */
1811 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1812 sorry ("__fp16 and no ldrh");
1814 /* If soft-float is specified then don't use FPU. */
1815 if (TARGET_SOFT_FLOAT)
1816 arm_fpu_attr = FPU_NONE;
1818 if (TARGET_AAPCS_BASED)
1820 if (arm_abi == ARM_ABI_IWMMXT)
1821 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1822 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1823 && TARGET_HARD_FLOAT
1825 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1827 arm_pcs_default = ARM_PCS_AAPCS;
1831 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1832 sorry ("-mfloat-abi=hard and VFP");
1834 if (arm_abi == ARM_ABI_APCS)
1835 arm_pcs_default = ARM_PCS_APCS;
1837 arm_pcs_default = ARM_PCS_ATPCS;
1840 /* For arm2/3 there is no need to do any scheduling if we are doing
1841 software floating-point. */
1842 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1843 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1845 /* Use the cp15 method if it is available. */
1846 if (target_thread_pointer == TP_AUTO)
1848 if (arm_arch6k && !TARGET_THUMB1)
1849 target_thread_pointer = TP_CP15;
1851 target_thread_pointer = TP_SOFT;
1854 if (TARGET_HARD_TP && TARGET_THUMB1)
1855 error ("can not use -mtp=cp15 with 16-bit Thumb");
1857 /* Override the default structure alignment for AAPCS ABI. */
1858 if (!global_options_set.x_arm_structure_size_boundary)
1860 if (TARGET_AAPCS_BASED)
1861 arm_structure_size_boundary = 8;
1865 if (arm_structure_size_boundary != 8
1866 && arm_structure_size_boundary != 32
1867 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1869 if (ARM_DOUBLEWORD_ALIGN)
1871 "structure size boundary can only be set to 8, 32 or 64");
1873 warning (0, "structure size boundary can only be set to 8 or 32");
1874 arm_structure_size_boundary
1875 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1879 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1881 error ("RTP PIC is incompatible with Thumb");
1885 /* If stack checking is disabled, we can use r10 as the PIC register,
1886 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1887 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1889 if (TARGET_VXWORKS_RTP)
1890 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1891 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1894 if (flag_pic && TARGET_VXWORKS_RTP)
1895 arm_pic_register = 9;
1897 if (arm_pic_register_string != NULL)
1899 int pic_register = decode_reg_name (arm_pic_register_string);
1902 warning (0, "-mpic-register= is useless without -fpic");
1904 /* Prevent the user from choosing an obviously stupid PIC register. */
1905 else if (pic_register < 0 || call_used_regs[pic_register]
1906 || pic_register == HARD_FRAME_POINTER_REGNUM
1907 || pic_register == STACK_POINTER_REGNUM
1908 || pic_register >= PC_REGNUM
1909 || (TARGET_VXWORKS_RTP
1910 && (unsigned int) pic_register != arm_pic_register))
1911 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1913 arm_pic_register = pic_register;
1916 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1917 if (fix_cm3_ldrd == 2)
1919 if (arm_selected_cpu->core == cortexm3)
1925 /* Enable -munaligned-access by default for
1926 - all ARMv6 architecture-based processors
1927 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1929 Disable -munaligned-access by default for
1930 - all pre-ARMv6 architecture-based processors
1931 - ARMv6-M architecture-based processors. */
1933 if (unaligned_access == 2)
1935 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1936 unaligned_access = 1;
1938 unaligned_access = 0;
1940 else if (unaligned_access == 1
1941 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1943 warning (0, "target CPU does not support unaligned accesses");
1944 unaligned_access = 0;
1947 if (TARGET_THUMB1 && flag_schedule_insns)
1949 /* Don't warn since it's on by default in -O2. */
1950 flag_schedule_insns = 0;
1955 /* If optimizing for size, bump the number of instructions that we
1956 are prepared to conditionally execute (even on a StrongARM). */
1957 max_insns_skipped = 6;
1960 max_insns_skipped = current_tune->max_insns_skipped;
1962 /* Hot/Cold partitioning is not currently supported, since we can't
1963 handle literal pool placement in that case. */
1964 if (flag_reorder_blocks_and_partition)
1966 inform (input_location,
1967 "-freorder-blocks-and-partition not supported on this architecture");
1968 flag_reorder_blocks_and_partition = 0;
1969 flag_reorder_blocks = 1;
1973 /* Hoisting PIC address calculations more aggressively provides a small,
1974 but measurable, size reduction for PIC code. Therefore, we decrease
1975 the bar for unrestricted expression hoisting to the cost of PIC address
1976 calculation, which is 2 instructions. */
1977 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1978 global_options.x_param_values,
1979 global_options_set.x_param_values);
1981 /* ARM EABI defaults to strict volatile bitfields. */
1982 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
1983 && abi_version_at_least(2))
1984 flag_strict_volatile_bitfields = 1;
1986 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1987 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1988 if (flag_prefetch_loop_arrays < 0
1991 && current_tune->num_prefetch_slots > 0)
1992 flag_prefetch_loop_arrays = 1;
1994 /* Set up parameters to be used in prefetching algorithm. Do not override the
1995 defaults unless we are tuning for a core we have researched values for. */
1996 if (current_tune->num_prefetch_slots > 0)
1997 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1998 current_tune->num_prefetch_slots,
1999 global_options.x_param_values,
2000 global_options_set.x_param_values);
2001 if (current_tune->l1_cache_line_size >= 0)
2002 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2003 current_tune->l1_cache_line_size,
2004 global_options.x_param_values,
2005 global_options_set.x_param_values);
2006 if (current_tune->l1_cache_size >= 0)
2007 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2008 current_tune->l1_cache_size,
2009 global_options.x_param_values,
2010 global_options_set.x_param_values);
2012 /* Use the alternative scheduling-pressure algorithm by default. */
2013 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2014 global_options.x_param_values,
2015 global_options_set.x_param_values);
2017 /* Register global variables with the garbage collector. */
2018 arm_add_gc_roots ();
2022 arm_add_gc_roots (void)
2024 gcc_obstack_init(&minipool_obstack);
2025 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2028 /* A table of known ARM exception types.
2029 For use with the interrupt function attribute. */
2033 const char *const arg;
2034 const unsigned long return_value;
2038 static const isr_attribute_arg isr_attribute_args [] =
2040 { "IRQ", ARM_FT_ISR },
2041 { "irq", ARM_FT_ISR },
2042 { "FIQ", ARM_FT_FIQ },
2043 { "fiq", ARM_FT_FIQ },
2044 { "ABORT", ARM_FT_ISR },
2045 { "abort", ARM_FT_ISR },
2046 { "ABORT", ARM_FT_ISR },
2047 { "abort", ARM_FT_ISR },
2048 { "UNDEF", ARM_FT_EXCEPTION },
2049 { "undef", ARM_FT_EXCEPTION },
2050 { "SWI", ARM_FT_EXCEPTION },
2051 { "swi", ARM_FT_EXCEPTION },
2052 { NULL, ARM_FT_NORMAL }
2055 /* Returns the (interrupt) function type of the current
2056 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2058 static unsigned long
2059 arm_isr_value (tree argument)
2061 const isr_attribute_arg * ptr;
2065 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2067 /* No argument - default to IRQ. */
2068 if (argument == NULL_TREE)
2071 /* Get the value of the argument. */
2072 if (TREE_VALUE (argument) == NULL_TREE
2073 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2074 return ARM_FT_UNKNOWN;
2076 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2078 /* Check it against the list of known arguments. */
2079 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2080 if (streq (arg, ptr->arg))
2081 return ptr->return_value;
2083 /* An unrecognized interrupt type. */
2084 return ARM_FT_UNKNOWN;
2087 /* Computes the type of the current function. */
2089 static unsigned long
2090 arm_compute_func_type (void)
2092 unsigned long type = ARM_FT_UNKNOWN;
2096 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2098 /* Decide if the current function is volatile. Such functions
2099 never return, and many memory cycles can be saved by not storing
2100 register values that will never be needed again. This optimization
2101 was added to speed up context switching in a kernel application. */
2103 && (TREE_NOTHROW (current_function_decl)
2104 || !(flag_unwind_tables
2106 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2107 && TREE_THIS_VOLATILE (current_function_decl))
2108 type |= ARM_FT_VOLATILE;
2110 if (cfun->static_chain_decl != NULL)
2111 type |= ARM_FT_NESTED;
2113 attr = DECL_ATTRIBUTES (current_function_decl);
2115 a = lookup_attribute ("naked", attr);
2117 type |= ARM_FT_NAKED;
2119 a = lookup_attribute ("isr", attr);
2121 a = lookup_attribute ("interrupt", attr);
2124 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2126 type |= arm_isr_value (TREE_VALUE (a));
2131 /* Returns the type of the current function. */
2134 arm_current_func_type (void)
2136 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2137 cfun->machine->func_type = arm_compute_func_type ();
2139 return cfun->machine->func_type;
2143 arm_allocate_stack_slots_for_args (void)
2145 /* Naked functions should not allocate stack slots for arguments. */
2146 return !IS_NAKED (arm_current_func_type ());
2150 /* Output assembler code for a block containing the constant parts
2151 of a trampoline, leaving space for the variable parts.
2153 On the ARM, (if r8 is the static chain regnum, and remembering that
2154 referencing pc adds an offset of 8) the trampoline looks like:
2157 .word static chain value
2158 .word function's address
2159 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2162 arm_asm_trampoline_template (FILE *f)
2166 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2167 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2169 else if (TARGET_THUMB2)
2171 /* The Thumb-2 trampoline is similar to the arm implementation.
2172 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2173 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2174 STATIC_CHAIN_REGNUM, PC_REGNUM);
2175 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2179 ASM_OUTPUT_ALIGN (f, 2);
2180 fprintf (f, "\t.code\t16\n");
2181 fprintf (f, ".Ltrampoline_start:\n");
2182 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2183 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2184 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2185 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2186 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2187 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2189 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2190 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2193 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2196 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2198 rtx fnaddr, mem, a_tramp;
2200 emit_block_move (m_tramp, assemble_trampoline_template (),
2201 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2203 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2204 emit_move_insn (mem, chain_value);
2206 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2207 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2208 emit_move_insn (mem, fnaddr);
2210 a_tramp = XEXP (m_tramp, 0);
2211 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2212 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2213 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2216 /* Thumb trampolines should be entered in thumb mode, so set
2217 the bottom bit of the address. */
2220 arm_trampoline_adjust_address (rtx addr)
2223 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2224 NULL, 0, OPTAB_LIB_WIDEN);
2228 /* Return 1 if it is possible to return using a single instruction.
2229 If SIBLING is non-null, this is a test for a return before a sibling
2230 call. SIBLING is the call insn, so we can examine its register usage. */
2233 use_return_insn (int iscond, rtx sibling)
2236 unsigned int func_type;
2237 unsigned long saved_int_regs;
2238 unsigned HOST_WIDE_INT stack_adjust;
2239 arm_stack_offsets *offsets;
2241 /* Never use a return instruction before reload has run. */
2242 if (!reload_completed)
2245 func_type = arm_current_func_type ();
2247 /* Naked, volatile and stack alignment functions need special
2249 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2252 /* So do interrupt functions that use the frame pointer and Thumb
2253 interrupt functions. */
2254 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2257 offsets = arm_get_frame_offsets ();
2258 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2260 /* As do variadic functions. */
2261 if (crtl->args.pretend_args_size
2262 || cfun->machine->uses_anonymous_args
2263 /* Or if the function calls __builtin_eh_return () */
2264 || crtl->calls_eh_return
2265 /* Or if the function calls alloca */
2266 || cfun->calls_alloca
2267 /* Or if there is a stack adjustment. However, if the stack pointer
2268 is saved on the stack, we can use a pre-incrementing stack load. */
2269 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2270 && stack_adjust == 4)))
2273 saved_int_regs = offsets->saved_regs_mask;
2275 /* Unfortunately, the insn
2277 ldmib sp, {..., sp, ...}
2279 triggers a bug on most SA-110 based devices, such that the stack
2280 pointer won't be correctly restored if the instruction takes a
2281 page fault. We work around this problem by popping r3 along with
2282 the other registers, since that is never slower than executing
2283 another instruction.
2285 We test for !arm_arch5 here, because code for any architecture
2286 less than this could potentially be run on one of the buggy
2288 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2290 /* Validate that r3 is a call-clobbered register (always true in
2291 the default abi) ... */
2292 if (!call_used_regs[3])
2295 /* ... that it isn't being used for a return value ... */
2296 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2299 /* ... or for a tail-call argument ... */
2302 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2304 if (find_regno_fusage (sibling, USE, 3))
2308 /* ... and that there are no call-saved registers in r0-r2
2309 (always true in the default ABI). */
2310 if (saved_int_regs & 0x7)
2314 /* Can't be done if interworking with Thumb, and any registers have been
2316 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2319 /* On StrongARM, conditional returns are expensive if they aren't
2320 taken and multiple registers have been stacked. */
2321 if (iscond && arm_tune_strongarm)
2323 /* Conditional return when just the LR is stored is a simple
2324 conditional-load instruction, that's not expensive. */
2325 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2329 && arm_pic_register != INVALID_REGNUM
2330 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2334 /* If there are saved registers but the LR isn't saved, then we need
2335 two instructions for the return. */
2336 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2339 /* Can't be done if any of the VFP regs are pushed,
2340 since this also requires an insn. */
2341 if (TARGET_HARD_FLOAT && TARGET_VFP)
2342 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2343 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2346 if (TARGET_REALLY_IWMMXT)
2347 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2348 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2354 /* Return TRUE if int I is a valid immediate ARM constant. */
2357 const_ok_for_arm (HOST_WIDE_INT i)
2361 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2362 be all zero, or all one. */
2363 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2364 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2365 != ((~(unsigned HOST_WIDE_INT) 0)
2366 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2369 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2371 /* Fast return for 0 and small values. We must do this for zero, since
2372 the code below can't handle that one case. */
2373 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2376 /* Get the number of trailing zeros. */
2377 lowbit = ffs((int) i) - 1;
2379 /* Only even shifts are allowed in ARM mode so round down to the
2380 nearest even number. */
2384 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2389 /* Allow rotated constants in ARM mode. */
2391 && ((i & ~0xc000003f) == 0
2392 || (i & ~0xf000000f) == 0
2393 || (i & ~0xfc000003) == 0))
2400 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2403 if (i == v || i == (v | (v << 8)))
2406 /* Allow repeated pattern 0xXY00XY00. */
2416 /* Return true if I is a valid constant for the operation CODE. */
2418 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2420 if (const_ok_for_arm (i))
2426 /* See if we can use movw. */
2427 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2430 /* Otherwise, try mvn. */
2431 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2434 /* See if we can use addw or subw. */
2436 && ((i & 0xfffff000) == 0
2437 || ((-i) & 0xfffff000) == 0))
2439 /* else fall through. */
2459 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2461 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2467 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2471 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2478 /* Return true if I is a valid di mode constant for the operation CODE. */
2480 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2482 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2483 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2484 rtx hi = GEN_INT (hi_val);
2485 rtx lo = GEN_INT (lo_val);
2493 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2500 /* Emit a sequence of insns to handle a large constant.
2501 CODE is the code of the operation required, it can be any of SET, PLUS,
2502 IOR, AND, XOR, MINUS;
2503 MODE is the mode in which the operation is being performed;
2504 VAL is the integer to operate on;
2505 SOURCE is the other operand (a register, or a null-pointer for SET);
2506 SUBTARGETS means it is safe to create scratch registers if that will
2507 either produce a simpler sequence, or we will want to cse the values.
2508 Return value is the number of insns emitted. */
2510 /* ??? Tweak this for thumb2. */
2512 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2513 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2517 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2518 cond = COND_EXEC_TEST (PATTERN (insn));
2522 if (subtargets || code == SET
2523 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2524 && REGNO (target) != REGNO (source)))
2526 /* After arm_reorg has been called, we can't fix up expensive
2527 constants by pushing them into memory so we must synthesize
2528 them in-line, regardless of the cost. This is only likely to
2529 be more costly on chips that have load delay slots and we are
2530 compiling without running the scheduler (so no splitting
2531 occurred before the final instruction emission).
2533 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2535 if (!after_arm_reorg
2537 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2539 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2544 /* Currently SET is the only monadic value for CODE, all
2545 the rest are diadic. */
2546 if (TARGET_USE_MOVT)
2547 arm_emit_movpair (target, GEN_INT (val));
2549 emit_set_insn (target, GEN_INT (val));
2555 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2557 if (TARGET_USE_MOVT)
2558 arm_emit_movpair (temp, GEN_INT (val));
2560 emit_set_insn (temp, GEN_INT (val));
2562 /* For MINUS, the value is subtracted from, since we never
2563 have subtraction of a constant. */
2565 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2567 emit_set_insn (target,
2568 gen_rtx_fmt_ee (code, mode, source, temp));
2574 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2578 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2579 ARM/THUMB2 immediates, and add up to VAL.
2580 Thr function return value gives the number of insns required. */
2582 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2583 struct four_ints *return_sequence)
2585 int best_consecutive_zeros = 0;
2589 struct four_ints tmp_sequence;
2591 /* If we aren't targeting ARM, the best place to start is always at
2592 the bottom, otherwise look more closely. */
2595 for (i = 0; i < 32; i += 2)
2597 int consecutive_zeros = 0;
2599 if (!(val & (3 << i)))
2601 while ((i < 32) && !(val & (3 << i)))
2603 consecutive_zeros += 2;
2606 if (consecutive_zeros > best_consecutive_zeros)
2608 best_consecutive_zeros = consecutive_zeros;
2609 best_start = i - consecutive_zeros;
2616 /* So long as it won't require any more insns to do so, it's
2617 desirable to emit a small constant (in bits 0...9) in the last
2618 insn. This way there is more chance that it can be combined with
2619 a later addressing insn to form a pre-indexed load or store
2620 operation. Consider:
2622 *((volatile int *)0xe0000100) = 1;
2623 *((volatile int *)0xe0000110) = 2;
2625 We want this to wind up as:
2629 str rB, [rA, #0x100]
2631 str rB, [rA, #0x110]
2633 rather than having to synthesize both large constants from scratch.
2635 Therefore, we calculate how many insns would be required to emit
2636 the constant starting from `best_start', and also starting from
2637 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2638 yield a shorter sequence, we may as well use zero. */
2639 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2641 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2643 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2644 if (insns2 <= insns1)
2646 *return_sequence = tmp_sequence;
2654 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2656 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2657 struct four_ints *return_sequence, int i)
2659 int remainder = val & 0xffffffff;
2662 /* Try and find a way of doing the job in either two or three
2665 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2666 location. We start at position I. This may be the MSB, or
2667 optimial_immediate_sequence may have positioned it at the largest block
2668 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2669 wrapping around to the top of the word when we drop off the bottom.
2670 In the worst case this code should produce no more than four insns.
2672 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2673 constants, shifted to any arbitrary location. We should always start
2678 unsigned int b1, b2, b3, b4;
2679 unsigned HOST_WIDE_INT result;
2682 gcc_assert (insns < 4);
2687 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2688 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2691 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2692 /* We can use addw/subw for the last 12 bits. */
2696 /* Use an 8-bit shifted/rotated immediate. */
2700 result = remainder & ((0x0ff << end)
2701 | ((i < end) ? (0xff >> (32 - end))
2708 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2709 arbitrary shifts. */
2710 i -= TARGET_ARM ? 2 : 1;
2714 /* Next, see if we can do a better job with a thumb2 replicated
2717 We do it this way around to catch the cases like 0x01F001E0 where
2718 two 8-bit immediates would work, but a replicated constant would
2721 TODO: 16-bit constants that don't clear all the bits, but still win.
2722 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2725 b1 = (remainder & 0xff000000) >> 24;
2726 b2 = (remainder & 0x00ff0000) >> 16;
2727 b3 = (remainder & 0x0000ff00) >> 8;
2728 b4 = remainder & 0xff;
2732 /* The 8-bit immediate already found clears b1 (and maybe b2),
2733 but must leave b3 and b4 alone. */
2735 /* First try to find a 32-bit replicated constant that clears
2736 almost everything. We can assume that we can't do it in one,
2737 or else we wouldn't be here. */
2738 unsigned int tmp = b1 & b2 & b3 & b4;
2739 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2741 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2742 + (tmp == b3) + (tmp == b4);
2744 && (matching_bytes >= 3
2745 || (matching_bytes == 2
2746 && const_ok_for_op (remainder & ~tmp2, code))))
2748 /* At least 3 of the bytes match, and the fourth has at
2749 least as many bits set, or two of the bytes match
2750 and it will only require one more insn to finish. */
2758 /* Second, try to find a 16-bit replicated constant that can
2759 leave three of the bytes clear. If b2 or b4 is already
2760 zero, then we can. If the 8-bit from above would not
2761 clear b2 anyway, then we still win. */
2762 else if (b1 == b3 && (!b2 || !b4
2763 || (remainder & 0x00ff0000 & ~result)))
2765 result = remainder & 0xff00ff00;
2771 /* The 8-bit immediate already found clears b2 (and maybe b3)
2772 and we don't get here unless b1 is alredy clear, but it will
2773 leave b4 unchanged. */
2775 /* If we can clear b2 and b4 at once, then we win, since the
2776 8-bits couldn't possibly reach that far. */
2779 result = remainder & 0x00ff00ff;
2785 return_sequence->i[insns++] = result;
2786 remainder &= ~result;
2788 if (code == SET || code == MINUS)
2796 /* Emit an instruction with the indicated PATTERN. If COND is
2797 non-NULL, conditionalize the execution of the instruction on COND
2801 emit_constant_insn (rtx cond, rtx pattern)
2804 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2805 emit_insn (pattern);
2808 /* As above, but extra parameter GENERATE which, if clear, suppresses
2812 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2813 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2818 int final_invert = 0;
2820 int set_sign_bit_copies = 0;
2821 int clear_sign_bit_copies = 0;
2822 int clear_zero_bit_copies = 0;
2823 int set_zero_bit_copies = 0;
2824 int insns = 0, neg_insns, inv_insns;
2825 unsigned HOST_WIDE_INT temp1, temp2;
2826 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2827 struct four_ints *immediates;
2828 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2830 /* Find out which operations are safe for a given CODE. Also do a quick
2831 check for degenerate cases; these can occur when DImode operations
2844 if (remainder == 0xffffffff)
2847 emit_constant_insn (cond,
2848 gen_rtx_SET (VOIDmode, target,
2849 GEN_INT (ARM_SIGN_EXTEND (val))));
2855 if (reload_completed && rtx_equal_p (target, source))
2859 emit_constant_insn (cond,
2860 gen_rtx_SET (VOIDmode, target, source));
2869 emit_constant_insn (cond,
2870 gen_rtx_SET (VOIDmode, target, const0_rtx));
2873 if (remainder == 0xffffffff)
2875 if (reload_completed && rtx_equal_p (target, source))
2878 emit_constant_insn (cond,
2879 gen_rtx_SET (VOIDmode, target, source));
2888 if (reload_completed && rtx_equal_p (target, source))
2891 emit_constant_insn (cond,
2892 gen_rtx_SET (VOIDmode, target, source));
2896 if (remainder == 0xffffffff)
2899 emit_constant_insn (cond,
2900 gen_rtx_SET (VOIDmode, target,
2901 gen_rtx_NOT (mode, source)));
2908 /* We treat MINUS as (val - source), since (source - val) is always
2909 passed as (source + (-val)). */
2913 emit_constant_insn (cond,
2914 gen_rtx_SET (VOIDmode, target,
2915 gen_rtx_NEG (mode, source)));
2918 if (const_ok_for_arm (val))
2921 emit_constant_insn (cond,
2922 gen_rtx_SET (VOIDmode, target,
2923 gen_rtx_MINUS (mode, GEN_INT (val),
2934 /* If we can do it in one insn get out quickly. */
2935 if (const_ok_for_op (val, code))
2938 emit_constant_insn (cond,
2939 gen_rtx_SET (VOIDmode, target,
2941 ? gen_rtx_fmt_ee (code, mode, source,
2947 /* Calculate a few attributes that may be useful for specific
2949 /* Count number of leading zeros. */
2950 for (i = 31; i >= 0; i--)
2952 if ((remainder & (1 << i)) == 0)
2953 clear_sign_bit_copies++;
2958 /* Count number of leading 1's. */
2959 for (i = 31; i >= 0; i--)
2961 if ((remainder & (1 << i)) != 0)
2962 set_sign_bit_copies++;
2967 /* Count number of trailing zero's. */
2968 for (i = 0; i <= 31; i++)
2970 if ((remainder & (1 << i)) == 0)
2971 clear_zero_bit_copies++;
2976 /* Count number of trailing 1's. */
2977 for (i = 0; i <= 31; i++)
2979 if ((remainder & (1 << i)) != 0)
2980 set_zero_bit_copies++;
2988 /* See if we can do this by sign_extending a constant that is known
2989 to be negative. This is a good, way of doing it, since the shift
2990 may well merge into a subsequent insn. */
2991 if (set_sign_bit_copies > 1)
2993 if (const_ok_for_arm
2994 (temp1 = ARM_SIGN_EXTEND (remainder
2995 << (set_sign_bit_copies - 1))))
2999 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3000 emit_constant_insn (cond,
3001 gen_rtx_SET (VOIDmode, new_src,
3003 emit_constant_insn (cond,
3004 gen_ashrsi3 (target, new_src,
3005 GEN_INT (set_sign_bit_copies - 1)));
3009 /* For an inverted constant, we will need to set the low bits,
3010 these will be shifted out of harm's way. */
3011 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3012 if (const_ok_for_arm (~temp1))
3016 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3017 emit_constant_insn (cond,
3018 gen_rtx_SET (VOIDmode, new_src,
3020 emit_constant_insn (cond,
3021 gen_ashrsi3 (target, new_src,
3022 GEN_INT (set_sign_bit_copies - 1)));
3028 /* See if we can calculate the value as the difference between two
3029 valid immediates. */
3030 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3032 int topshift = clear_sign_bit_copies & ~1;
3034 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3035 & (0xff000000 >> topshift));
3037 /* If temp1 is zero, then that means the 9 most significant
3038 bits of remainder were 1 and we've caused it to overflow.
3039 When topshift is 0 we don't need to do anything since we
3040 can borrow from 'bit 32'. */
3041 if (temp1 == 0 && topshift != 0)
3042 temp1 = 0x80000000 >> (topshift - 1);
3044 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3046 if (const_ok_for_arm (temp2))
3050 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3051 emit_constant_insn (cond,
3052 gen_rtx_SET (VOIDmode, new_src,
3054 emit_constant_insn (cond,
3055 gen_addsi3 (target, new_src,
3063 /* See if we can generate this by setting the bottom (or the top)
3064 16 bits, and then shifting these into the other half of the
3065 word. We only look for the simplest cases, to do more would cost
3066 too much. Be careful, however, not to generate this when the
3067 alternative would take fewer insns. */
3068 if (val & 0xffff0000)
3070 temp1 = remainder & 0xffff0000;
3071 temp2 = remainder & 0x0000ffff;
3073 /* Overlaps outside this range are best done using other methods. */
3074 for (i = 9; i < 24; i++)
3076 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3077 && !const_ok_for_arm (temp2))
3079 rtx new_src = (subtargets
3080 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3082 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3083 source, subtargets, generate);
3091 gen_rtx_ASHIFT (mode, source,
3098 /* Don't duplicate cases already considered. */
3099 for (i = 17; i < 24; i++)
3101 if (((temp1 | (temp1 >> i)) == remainder)
3102 && !const_ok_for_arm (temp1))
3104 rtx new_src = (subtargets
3105 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3107 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3108 source, subtargets, generate);
3113 gen_rtx_SET (VOIDmode, target,
3116 gen_rtx_LSHIFTRT (mode, source,
3127 /* If we have IOR or XOR, and the constant can be loaded in a
3128 single instruction, and we can find a temporary to put it in,
3129 then this can be done in two instructions instead of 3-4. */
3131 /* TARGET can't be NULL if SUBTARGETS is 0 */
3132 || (reload_completed && !reg_mentioned_p (target, source)))
3134 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3138 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3140 emit_constant_insn (cond,
3141 gen_rtx_SET (VOIDmode, sub,
3143 emit_constant_insn (cond,
3144 gen_rtx_SET (VOIDmode, target,
3145 gen_rtx_fmt_ee (code, mode,
3156 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3157 and the remainder 0s for e.g. 0xfff00000)
3158 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3160 This can be done in 2 instructions by using shifts with mov or mvn.
3165 mvn r0, r0, lsr #12 */
3166 if (set_sign_bit_copies > 8
3167 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3171 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3172 rtx shift = GEN_INT (set_sign_bit_copies);
3176 gen_rtx_SET (VOIDmode, sub,
3178 gen_rtx_ASHIFT (mode,
3183 gen_rtx_SET (VOIDmode, target,
3185 gen_rtx_LSHIFTRT (mode, sub,
3192 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3194 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3196 For eg. r0 = r0 | 0xfff
3201 if (set_zero_bit_copies > 8
3202 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3206 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3207 rtx shift = GEN_INT (set_zero_bit_copies);
3211 gen_rtx_SET (VOIDmode, sub,
3213 gen_rtx_LSHIFTRT (mode,
3218 gen_rtx_SET (VOIDmode, target,
3220 gen_rtx_ASHIFT (mode, sub,
3226 /* This will never be reached for Thumb2 because orn is a valid
3227 instruction. This is for Thumb1 and the ARM 32 bit cases.
3229 x = y | constant (such that ~constant is a valid constant)
3231 x = ~(~y & ~constant).
3233 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3237 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3238 emit_constant_insn (cond,
3239 gen_rtx_SET (VOIDmode, sub,
3240 gen_rtx_NOT (mode, source)));
3243 sub = gen_reg_rtx (mode);
3244 emit_constant_insn (cond,
3245 gen_rtx_SET (VOIDmode, sub,
3246 gen_rtx_AND (mode, source,
3248 emit_constant_insn (cond,
3249 gen_rtx_SET (VOIDmode, target,
3250 gen_rtx_NOT (mode, sub)));
3257 /* See if two shifts will do 2 or more insn's worth of work. */
3258 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3260 HOST_WIDE_INT shift_mask = ((0xffffffff
3261 << (32 - clear_sign_bit_copies))
3264 if ((remainder | shift_mask) != 0xffffffff)
3268 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3269 insns = arm_gen_constant (AND, mode, cond,
3270 remainder | shift_mask,
3271 new_src, source, subtargets, 1);
3276 rtx targ = subtargets ? NULL_RTX : target;
3277 insns = arm_gen_constant (AND, mode, cond,
3278 remainder | shift_mask,
3279 targ, source, subtargets, 0);
3285 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3286 rtx shift = GEN_INT (clear_sign_bit_copies);
3288 emit_insn (gen_ashlsi3 (new_src, source, shift));
3289 emit_insn (gen_lshrsi3 (target, new_src, shift));
3295 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3297 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3299 if ((remainder | shift_mask) != 0xffffffff)
3303 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3305 insns = arm_gen_constant (AND, mode, cond,
3306 remainder | shift_mask,
3307 new_src, source, subtargets, 1);
3312 rtx targ = subtargets ? NULL_RTX : target;
3314 insns = arm_gen_constant (AND, mode, cond,
3315 remainder | shift_mask,
3316 targ, source, subtargets, 0);
3322 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3323 rtx shift = GEN_INT (clear_zero_bit_copies);
3325 emit_insn (gen_lshrsi3 (new_src, source, shift));
3326 emit_insn (gen_ashlsi3 (target, new_src, shift));
3338 /* Calculate what the instruction sequences would be if we generated it
3339 normally, negated, or inverted. */
3341 /* AND cannot be split into multiple insns, so invert and use BIC. */
3344 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3347 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3352 if (can_invert || final_invert)
3353 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3358 immediates = &pos_immediates;
3360 /* Is the negated immediate sequence more efficient? */
3361 if (neg_insns < insns && neg_insns <= inv_insns)
3364 immediates = &neg_immediates;
3369 /* Is the inverted immediate sequence more efficient?
3370 We must allow for an extra NOT instruction for XOR operations, although
3371 there is some chance that the final 'mvn' will get optimized later. */
3372 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3375 immediates = &inv_immediates;
3383 /* Now output the chosen sequence as instructions. */
3386 for (i = 0; i < insns; i++)
3388 rtx new_src, temp1_rtx;
3390 temp1 = immediates->i[i];
3392 if (code == SET || code == MINUS)
3393 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3394 else if ((final_invert || i < (insns - 1)) && subtargets)
3395 new_src = gen_reg_rtx (mode);
3401 else if (can_negate)
3404 temp1 = trunc_int_for_mode (temp1, mode);
3405 temp1_rtx = GEN_INT (temp1);
3409 else if (code == MINUS)
3410 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3412 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3414 emit_constant_insn (cond,
3415 gen_rtx_SET (VOIDmode, new_src,
3421 can_negate = can_invert;
3425 else if (code == MINUS)
3433 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3434 gen_rtx_NOT (mode, source)));
3441 /* Canonicalize a comparison so that we are more likely to recognize it.
3442 This can be done for a few constant compares, where we can make the
3443 immediate value easier to load. */
3446 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3448 enum machine_mode mode;
3449 unsigned HOST_WIDE_INT i, maxval;
3451 mode = GET_MODE (*op0);
3452 if (mode == VOIDmode)
3453 mode = GET_MODE (*op1);
3455 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3457 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3458 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3459 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3460 for GTU/LEU in Thumb mode. */
3465 if (code == GT || code == LE
3466 || (!TARGET_ARM && (code == GTU || code == LEU)))
3468 /* Missing comparison. First try to use an available
3470 if (GET_CODE (*op1) == CONST_INT)
3478 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3480 *op1 = GEN_INT (i + 1);
3481 return code == GT ? GE : LT;
3486 if (i != ~((unsigned HOST_WIDE_INT) 0)
3487 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3489 *op1 = GEN_INT (i + 1);
3490 return code == GTU ? GEU : LTU;
3498 /* If that did not work, reverse the condition. */
3502 return swap_condition (code);
3508 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3509 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3510 to facilitate possible combining with a cmp into 'ands'. */
3512 && GET_CODE (*op0) == ZERO_EXTEND
3513 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3514 && GET_MODE (XEXP (*op0, 0)) == QImode
3515 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3516 && subreg_lowpart_p (XEXP (*op0, 0))
3517 && *op1 == const0_rtx)
3518 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3521 /* Comparisons smaller than DImode. Only adjust comparisons against
3522 an out-of-range constant. */
3523 if (GET_CODE (*op1) != CONST_INT
3524 || const_ok_for_arm (INTVAL (*op1))
3525 || const_ok_for_arm (- INTVAL (*op1)))
3539 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3541 *op1 = GEN_INT (i + 1);
3542 return code == GT ? GE : LT;
3549 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3551 *op1 = GEN_INT (i - 1);
3552 return code == GE ? GT : LE;
3558 if (i != ~((unsigned HOST_WIDE_INT) 0)
3559 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3561 *op1 = GEN_INT (i + 1);
3562 return code == GTU ? GEU : LTU;
3569 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3571 *op1 = GEN_INT (i - 1);
3572 return code == GEU ? GTU : LEU;
3584 /* Define how to find the value returned by a function. */
3587 arm_function_value(const_tree type, const_tree func,
3588 bool outgoing ATTRIBUTE_UNUSED)
3590 enum machine_mode mode;
3591 int unsignedp ATTRIBUTE_UNUSED;
3592 rtx r ATTRIBUTE_UNUSED;
3594 mode = TYPE_MODE (type);
3596 if (TARGET_AAPCS_BASED)
3597 return aapcs_allocate_return_reg (mode, type, func);
3599 /* Promote integer types. */
3600 if (INTEGRAL_TYPE_P (type))
3601 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3603 /* Promotes small structs returned in a register to full-word size
3604 for big-endian AAPCS. */
3605 if (arm_return_in_msb (type))
3607 HOST_WIDE_INT size = int_size_in_bytes (type);
3608 if (size % UNITS_PER_WORD != 0)
3610 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3611 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3615 return arm_libcall_value_1 (mode);
3619 libcall_eq (const void *p1, const void *p2)
3621 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3625 libcall_hash (const void *p1)
3627 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3631 add_libcall (htab_t htab, rtx libcall)
3633 *htab_find_slot (htab, libcall, INSERT) = libcall;
3637 arm_libcall_uses_aapcs_base (const_rtx libcall)
3639 static bool init_done = false;
3640 static htab_t libcall_htab;
3646 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3650 add_libcall (libcall_htab,
3651 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3652 add_libcall (libcall_htab,
3653 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3654 add_libcall (libcall_htab,
3655 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3657 add_libcall (libcall_htab,
3658 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3659 add_libcall (libcall_htab,
3660 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3661 add_libcall (libcall_htab,
3662 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3663 add_libcall (libcall_htab,
3664 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3666 add_libcall (libcall_htab,
3667 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3668 add_libcall (libcall_htab,
3669 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3670 add_libcall (libcall_htab,
3671 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3672 add_libcall (libcall_htab,
3673 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3674 add_libcall (libcall_htab,
3675 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3676 add_libcall (libcall_htab,
3677 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3678 add_libcall (libcall_htab,
3679 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3680 add_libcall (libcall_htab,
3681 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3683 /* Values from double-precision helper functions are returned in core
3684 registers if the selected core only supports single-precision
3685 arithmetic, even if we are using the hard-float ABI. The same is
3686 true for single-precision helpers, but we will never be using the
3687 hard-float ABI on a CPU which doesn't support single-precision
3688 operations in hardware. */
3689 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3690 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3691 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3692 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3693 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3694 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3695 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3696 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3697 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3698 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3699 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3700 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3702 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3706 return libcall && htab_find (libcall_htab, libcall) != NULL;
3710 arm_libcall_value_1 (enum machine_mode mode)
3712 if (TARGET_AAPCS_BASED)
3713 return aapcs_libcall_value (mode);
3714 else if (TARGET_IWMMXT_ABI
3715 && arm_vector_mode_supported_p (mode))
3716 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3718 return gen_rtx_REG (mode, ARG_REGISTER (1));
3721 /* Define how to find the value returned by a library function
3722 assuming the value has mode MODE. */
3725 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3727 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3728 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3730 /* The following libcalls return their result in integer registers,
3731 even though they return a floating point value. */
3732 if (arm_libcall_uses_aapcs_base (libcall))
3733 return gen_rtx_REG (mode, ARG_REGISTER(1));
3737 return arm_libcall_value_1 (mode);
3740 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3743 arm_function_value_regno_p (const unsigned int regno)
3745 if (regno == ARG_REGISTER (1)
3747 && TARGET_AAPCS_BASED
3749 && TARGET_HARD_FLOAT
3750 && regno == FIRST_VFP_REGNUM)
3751 || (TARGET_IWMMXT_ABI
3752 && regno == FIRST_IWMMXT_REGNUM))
3758 /* Determine the amount of memory needed to store the possible return
3759 registers of an untyped call. */
3761 arm_apply_result_size (void)
3767 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3769 if (TARGET_IWMMXT_ABI)
3776 /* Decide whether TYPE should be returned in memory (true)
3777 or in a register (false). FNTYPE is the type of the function making
3780 arm_return_in_memory (const_tree type, const_tree fntype)
3784 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3786 if (TARGET_AAPCS_BASED)
3788 /* Simple, non-aggregate types (ie not including vectors and
3789 complex) are always returned in a register (or registers).
3790 We don't care about which register here, so we can short-cut
3791 some of the detail. */
3792 if (!AGGREGATE_TYPE_P (type)
3793 && TREE_CODE (type) != VECTOR_TYPE
3794 && TREE_CODE (type) != COMPLEX_TYPE)
3797 /* Any return value that is no larger than one word can be
3799 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3802 /* Check any available co-processors to see if they accept the
3803 type as a register candidate (VFP, for example, can return
3804 some aggregates in consecutive registers). These aren't
3805 available if the call is variadic. */
3806 if (aapcs_select_return_coproc (type, fntype) >= 0)
3809 /* Vector values should be returned using ARM registers, not
3810 memory (unless they're over 16 bytes, which will break since
3811 we only have four call-clobbered registers to play with). */
3812 if (TREE_CODE (type) == VECTOR_TYPE)
3813 return (size < 0 || size > (4 * UNITS_PER_WORD));
3815 /* The rest go in memory. */
3819 if (TREE_CODE (type) == VECTOR_TYPE)
3820 return (size < 0 || size > (4 * UNITS_PER_WORD));
3822 if (!AGGREGATE_TYPE_P (type) &&
3823 (TREE_CODE (type) != VECTOR_TYPE))
3824 /* All simple types are returned in registers. */
3827 if (arm_abi != ARM_ABI_APCS)
3829 /* ATPCS and later return aggregate types in memory only if they are
3830 larger than a word (or are variable size). */
3831 return (size < 0 || size > UNITS_PER_WORD);
3834 /* For the arm-wince targets we choose to be compatible with Microsoft's
3835 ARM and Thumb compilers, which always return aggregates in memory. */
3837 /* All structures/unions bigger than one word are returned in memory.
3838 Also catch the case where int_size_in_bytes returns -1. In this case
3839 the aggregate is either huge or of variable size, and in either case
3840 we will want to return it via memory and not in a register. */
3841 if (size < 0 || size > UNITS_PER_WORD)
3844 if (TREE_CODE (type) == RECORD_TYPE)
3848 /* For a struct the APCS says that we only return in a register
3849 if the type is 'integer like' and every addressable element
3850 has an offset of zero. For practical purposes this means
3851 that the structure can have at most one non bit-field element
3852 and that this element must be the first one in the structure. */
3854 /* Find the first field, ignoring non FIELD_DECL things which will
3855 have been created by C++. */
3856 for (field = TYPE_FIELDS (type);
3857 field && TREE_CODE (field) != FIELD_DECL;
3858 field = DECL_CHAIN (field))
3862 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3864 /* Check that the first field is valid for returning in a register. */
3866 /* ... Floats are not allowed */
3867 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3870 /* ... Aggregates that are not themselves valid for returning in
3871 a register are not allowed. */
3872 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3875 /* Now check the remaining fields, if any. Only bitfields are allowed,
3876 since they are not addressable. */
3877 for (field = DECL_CHAIN (field);
3879 field = DECL_CHAIN (field))
3881 if (TREE_CODE (field) != FIELD_DECL)
3884 if (!DECL_BIT_FIELD_TYPE (field))
3891 if (TREE_CODE (type) == UNION_TYPE)
3895 /* Unions can be returned in registers if every element is
3896 integral, or can be returned in an integer register. */
3897 for (field = TYPE_FIELDS (type);
3899 field = DECL_CHAIN (field))
3901 if (TREE_CODE (field) != FIELD_DECL)
3904 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3907 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3913 #endif /* not ARM_WINCE */
3915 /* Return all other types in memory. */
3919 const struct pcs_attribute_arg
3923 } pcs_attribute_args[] =
3925 {"aapcs", ARM_PCS_AAPCS},
3926 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3928 /* We could recognize these, but changes would be needed elsewhere
3929 * to implement them. */
3930 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3931 {"atpcs", ARM_PCS_ATPCS},
3932 {"apcs", ARM_PCS_APCS},
3934 {NULL, ARM_PCS_UNKNOWN}
3938 arm_pcs_from_attribute (tree attr)
3940 const struct pcs_attribute_arg *ptr;
3943 /* Get the value of the argument. */
3944 if (TREE_VALUE (attr) == NULL_TREE
3945 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3946 return ARM_PCS_UNKNOWN;
3948 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3950 /* Check it against the list of known arguments. */
3951 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3952 if (streq (arg, ptr->arg))
3955 /* An unrecognized interrupt type. */
3956 return ARM_PCS_UNKNOWN;
3959 /* Get the PCS variant to use for this call. TYPE is the function's type
3960 specification, DECL is the specific declartion. DECL may be null if
3961 the call could be indirect or if this is a library call. */
3963 arm_get_pcs_model (const_tree type, const_tree decl)
3965 bool user_convention = false;
3966 enum arm_pcs user_pcs = arm_pcs_default;
3971 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3974 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3975 user_convention = true;
3978 if (TARGET_AAPCS_BASED)
3980 /* Detect varargs functions. These always use the base rules
3981 (no argument is ever a candidate for a co-processor
3983 bool base_rules = stdarg_p (type);
3985 if (user_convention)
3987 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3988 sorry ("non-AAPCS derived PCS variant");
3989 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3990 error ("variadic functions must use the base AAPCS variant");
3994 return ARM_PCS_AAPCS;
3995 else if (user_convention)
3997 else if (decl && flag_unit_at_a_time)
3999 /* Local functions never leak outside this compilation unit,
4000 so we are free to use whatever conventions are
4002 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4003 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4005 return ARM_PCS_AAPCS_LOCAL;
4008 else if (user_convention && user_pcs != arm_pcs_default)
4009 sorry ("PCS variant");
4011 /* For everything else we use the target's default. */
4012 return arm_pcs_default;
4017 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4018 const_tree fntype ATTRIBUTE_UNUSED,
4019 rtx libcall ATTRIBUTE_UNUSED,
4020 const_tree fndecl ATTRIBUTE_UNUSED)
4022 /* Record the unallocated VFP registers. */
4023 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4024 pcum->aapcs_vfp_reg_alloc = 0;
4027 /* Walk down the type tree of TYPE counting consecutive base elements.
4028 If *MODEP is VOIDmode, then set it to the first valid floating point
4029 type. If a non-floating point type is found, or if a floating point
4030 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4031 otherwise return the count in the sub-tree. */
4033 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4035 enum machine_mode mode;
4038 switch (TREE_CODE (type))
4041 mode = TYPE_MODE (type);
4042 if (mode != DFmode && mode != SFmode)
4045 if (*modep == VOIDmode)
4054 mode = TYPE_MODE (TREE_TYPE (type));
4055 if (mode != DFmode && mode != SFmode)
4058 if (*modep == VOIDmode)
4067 /* Use V2SImode and V4SImode as representatives of all 64-bit
4068 and 128-bit vector types, whether or not those modes are
4069 supported with the present options. */
4070 size = int_size_in_bytes (type);
4083 if (*modep == VOIDmode)
4086 /* Vector modes are considered to be opaque: two vectors are
4087 equivalent for the purposes of being homogeneous aggregates
4088 if they are the same size. */
4097 tree index = TYPE_DOMAIN (type);
4099 /* Can't handle incomplete types. */
4100 if (!COMPLETE_TYPE_P(type))
4103 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4106 || !TYPE_MAX_VALUE (index)
4107 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4108 || !TYPE_MIN_VALUE (index)
4109 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4113 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4114 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4116 /* There must be no padding. */
4117 if (!host_integerp (TYPE_SIZE (type), 1)
4118 || (tree_low_cst (TYPE_SIZE (type), 1)
4119 != count * GET_MODE_BITSIZE (*modep)))
4131 /* Can't handle incomplete types. */
4132 if (!COMPLETE_TYPE_P(type))
4135 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4137 if (TREE_CODE (field) != FIELD_DECL)
4140 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4146 /* There must be no padding. */
4147 if (!host_integerp (TYPE_SIZE (type), 1)
4148 || (tree_low_cst (TYPE_SIZE (type), 1)
4149 != count * GET_MODE_BITSIZE (*modep)))
4156 case QUAL_UNION_TYPE:
4158 /* These aren't very interesting except in a degenerate case. */
4163 /* Can't handle incomplete types. */
4164 if (!COMPLETE_TYPE_P(type))
4167 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4169 if (TREE_CODE (field) != FIELD_DECL)
4172 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4175 count = count > sub_count ? count : sub_count;
4178 /* There must be no padding. */
4179 if (!host_integerp (TYPE_SIZE (type), 1)
4180 || (tree_low_cst (TYPE_SIZE (type), 1)
4181 != count * GET_MODE_BITSIZE (*modep)))
4194 /* Return true if PCS_VARIANT should use VFP registers. */
4196 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4198 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4200 static bool seen_thumb1_vfp = false;
4202 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4204 sorry ("Thumb-1 hard-float VFP ABI");
4205 /* sorry() is not immediately fatal, so only display this once. */
4206 seen_thumb1_vfp = true;
4212 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4215 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4216 (TARGET_VFP_DOUBLE || !is_double));
4219 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4220 suitable for passing or returning in VFP registers for the PCS
4221 variant selected. If it is, then *BASE_MODE is updated to contain
4222 a machine mode describing each element of the argument's type and
4223 *COUNT to hold the number of such elements. */
4225 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4226 enum machine_mode mode, const_tree type,
4227 enum machine_mode *base_mode, int *count)
4229 enum machine_mode new_mode = VOIDmode;
4231 /* If we have the type information, prefer that to working things
4232 out from the mode. */
4235 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4237 if (ag_count > 0 && ag_count <= 4)
4242 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4243 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4244 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4249 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4252 new_mode = (mode == DCmode ? DFmode : SFmode);
4258 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4261 *base_mode = new_mode;
4266 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4267 enum machine_mode mode, const_tree type)
4269 int count ATTRIBUTE_UNUSED;
4270 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4272 if (!use_vfp_abi (pcs_variant, false))
4274 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4279 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4282 if (!use_vfp_abi (pcum->pcs_variant, false))
4285 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4286 &pcum->aapcs_vfp_rmode,
4287 &pcum->aapcs_vfp_rcount);
4291 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4292 const_tree type ATTRIBUTE_UNUSED)
4294 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4295 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4298 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4299 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4301 pcum->aapcs_vfp_reg_alloc = mask << regno;
4302 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4305 int rcount = pcum->aapcs_vfp_rcount;
4307 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4311 /* Avoid using unsupported vector modes. */
4312 if (rmode == V2SImode)
4314 else if (rmode == V4SImode)
4321 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4322 for (i = 0; i < rcount; i++)
4324 rtx tmp = gen_rtx_REG (rmode,
4325 FIRST_VFP_REGNUM + regno + i * rshift);
4326 tmp = gen_rtx_EXPR_LIST
4328 GEN_INT (i * GET_MODE_SIZE (rmode)));
4329 XVECEXP (par, 0, i) = tmp;
4332 pcum->aapcs_reg = par;
4335 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4342 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4343 enum machine_mode mode,
4344 const_tree type ATTRIBUTE_UNUSED)
4346 if (!use_vfp_abi (pcs_variant, false))
4349 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4352 enum machine_mode ag_mode;
4357 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4362 if (ag_mode == V2SImode)
4364 else if (ag_mode == V4SImode)
4370 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4371 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4372 for (i = 0; i < count; i++)
4374 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4375 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4376 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4377 XVECEXP (par, 0, i) = tmp;
4383 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4387 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4388 enum machine_mode mode ATTRIBUTE_UNUSED,
4389 const_tree type ATTRIBUTE_UNUSED)
4391 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4392 pcum->aapcs_vfp_reg_alloc = 0;
4396 #define AAPCS_CP(X) \
4398 aapcs_ ## X ## _cum_init, \
4399 aapcs_ ## X ## _is_call_candidate, \
4400 aapcs_ ## X ## _allocate, \
4401 aapcs_ ## X ## _is_return_candidate, \
4402 aapcs_ ## X ## _allocate_return_reg, \
4403 aapcs_ ## X ## _advance \
4406 /* Table of co-processors that can be used to pass arguments in
4407 registers. Idealy no arugment should be a candidate for more than
4408 one co-processor table entry, but the table is processed in order
4409 and stops after the first match. If that entry then fails to put
4410 the argument into a co-processor register, the argument will go on
4414 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4415 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4417 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4418 BLKmode) is a candidate for this co-processor's registers; this
4419 function should ignore any position-dependent state in
4420 CUMULATIVE_ARGS and only use call-type dependent information. */
4421 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4423 /* Return true if the argument does get a co-processor register; it
4424 should set aapcs_reg to an RTX of the register allocated as is
4425 required for a return from FUNCTION_ARG. */
4426 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4428 /* Return true if a result of mode MODE (or type TYPE if MODE is
4429 BLKmode) is can be returned in this co-processor's registers. */
4430 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4432 /* Allocate and return an RTX element to hold the return type of a
4433 call, this routine must not fail and will only be called if
4434 is_return_candidate returned true with the same parameters. */
4435 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4437 /* Finish processing this argument and prepare to start processing
4439 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4440 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4448 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4453 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4454 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4461 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4463 /* We aren't passed a decl, so we can't check that a call is local.
4464 However, it isn't clear that that would be a win anyway, since it
4465 might limit some tail-calling opportunities. */
4466 enum arm_pcs pcs_variant;
4470 const_tree fndecl = NULL_TREE;
4472 if (TREE_CODE (fntype) == FUNCTION_DECL)
4475 fntype = TREE_TYPE (fntype);
4478 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4481 pcs_variant = arm_pcs_default;
4483 if (pcs_variant != ARM_PCS_AAPCS)
4487 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4488 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4497 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4500 /* We aren't passed a decl, so we can't check that a call is local.
4501 However, it isn't clear that that would be a win anyway, since it
4502 might limit some tail-calling opportunities. */
4503 enum arm_pcs pcs_variant;
4504 int unsignedp ATTRIBUTE_UNUSED;
4508 const_tree fndecl = NULL_TREE;
4510 if (TREE_CODE (fntype) == FUNCTION_DECL)
4513 fntype = TREE_TYPE (fntype);
4516 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4519 pcs_variant = arm_pcs_default;
4521 /* Promote integer types. */
4522 if (type && INTEGRAL_TYPE_P (type))
4523 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4525 if (pcs_variant != ARM_PCS_AAPCS)
4529 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4530 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4532 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4536 /* Promotes small structs returned in a register to full-word size
4537 for big-endian AAPCS. */
4538 if (type && arm_return_in_msb (type))
4540 HOST_WIDE_INT size = int_size_in_bytes (type);
4541 if (size % UNITS_PER_WORD != 0)
4543 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4544 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4548 return gen_rtx_REG (mode, R0_REGNUM);
4552 aapcs_libcall_value (enum machine_mode mode)
4554 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4555 && GET_MODE_SIZE (mode) <= 4)
4558 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4561 /* Lay out a function argument using the AAPCS rules. The rule
4562 numbers referred to here are those in the AAPCS. */
4564 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4565 const_tree type, bool named)
4570 /* We only need to do this once per argument. */
4571 if (pcum->aapcs_arg_processed)
4574 pcum->aapcs_arg_processed = true;
4576 /* Special case: if named is false then we are handling an incoming
4577 anonymous argument which is on the stack. */
4581 /* Is this a potential co-processor register candidate? */
4582 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4584 int slot = aapcs_select_call_coproc (pcum, mode, type);
4585 pcum->aapcs_cprc_slot = slot;
4587 /* We don't have to apply any of the rules from part B of the
4588 preparation phase, these are handled elsewhere in the
4593 /* A Co-processor register candidate goes either in its own
4594 class of registers or on the stack. */
4595 if (!pcum->aapcs_cprc_failed[slot])
4597 /* C1.cp - Try to allocate the argument to co-processor
4599 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4602 /* C2.cp - Put the argument on the stack and note that we
4603 can't assign any more candidates in this slot. We also
4604 need to note that we have allocated stack space, so that
4605 we won't later try to split a non-cprc candidate between
4606 core registers and the stack. */
4607 pcum->aapcs_cprc_failed[slot] = true;
4608 pcum->can_split = false;
4611 /* We didn't get a register, so this argument goes on the
4613 gcc_assert (pcum->can_split == false);
4618 /* C3 - For double-word aligned arguments, round the NCRN up to the
4619 next even number. */
4620 ncrn = pcum->aapcs_ncrn;
4621 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4624 nregs = ARM_NUM_REGS2(mode, type);
4626 /* Sigh, this test should really assert that nregs > 0, but a GCC
4627 extension allows empty structs and then gives them empty size; it
4628 then allows such a structure to be passed by value. For some of
4629 the code below we have to pretend that such an argument has
4630 non-zero size so that we 'locate' it correctly either in
4631 registers or on the stack. */
4632 gcc_assert (nregs >= 0);
4634 nregs2 = nregs ? nregs : 1;
4636 /* C4 - Argument fits entirely in core registers. */
4637 if (ncrn + nregs2 <= NUM_ARG_REGS)
4639 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4640 pcum->aapcs_next_ncrn = ncrn + nregs;
4644 /* C5 - Some core registers left and there are no arguments already
4645 on the stack: split this argument between the remaining core
4646 registers and the stack. */
4647 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4649 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4650 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4651 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4655 /* C6 - NCRN is set to 4. */
4656 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4658 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4662 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4663 for a call to a function whose data type is FNTYPE.
4664 For a library call, FNTYPE is NULL. */
4666 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4668 tree fndecl ATTRIBUTE_UNUSED)
4670 /* Long call handling. */
4672 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4674 pcum->pcs_variant = arm_pcs_default;
4676 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4678 if (arm_libcall_uses_aapcs_base (libname))
4679 pcum->pcs_variant = ARM_PCS_AAPCS;
4681 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4682 pcum->aapcs_reg = NULL_RTX;
4683 pcum->aapcs_partial = 0;
4684 pcum->aapcs_arg_processed = false;
4685 pcum->aapcs_cprc_slot = -1;
4686 pcum->can_split = true;
4688 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4692 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4694 pcum->aapcs_cprc_failed[i] = false;
4695 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4703 /* On the ARM, the offset starts at 0. */
4705 pcum->iwmmxt_nregs = 0;
4706 pcum->can_split = true;
4708 /* Varargs vectors are treated the same as long long.
4709 named_count avoids having to change the way arm handles 'named' */
4710 pcum->named_count = 0;
4713 if (TARGET_REALLY_IWMMXT && fntype)
4717 for (fn_arg = TYPE_ARG_TYPES (fntype);
4719 fn_arg = TREE_CHAIN (fn_arg))
4720 pcum->named_count += 1;
4722 if (! pcum->named_count)
4723 pcum->named_count = INT_MAX;
4728 /* Return true if mode/type need doubleword alignment. */
4730 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4732 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4733 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4737 /* Determine where to put an argument to a function.
4738 Value is zero to push the argument on the stack,
4739 or a hard register in which to store the argument.
4741 MODE is the argument's machine mode.
4742 TYPE is the data type of the argument (as a tree).
4743 This is null for libcalls where that information may
4745 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4746 the preceding args and about the function being called.
4747 NAMED is nonzero if this argument is a named parameter
4748 (otherwise it is an extra parameter matching an ellipsis).
4750 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4751 other arguments are passed on the stack. If (NAMED == 0) (which happens
4752 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4753 defined), say it is passed in the stack (function_prologue will
4754 indeed make it pass in the stack if necessary). */
4757 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4758 const_tree type, bool named)
4760 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4763 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4764 a call insn (op3 of a call_value insn). */
4765 if (mode == VOIDmode)
4768 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4770 aapcs_layout_arg (pcum, mode, type, named);
4771 return pcum->aapcs_reg;
4774 /* Varargs vectors are treated the same as long long.
4775 named_count avoids having to change the way arm handles 'named' */
4776 if (TARGET_IWMMXT_ABI
4777 && arm_vector_mode_supported_p (mode)
4778 && pcum->named_count > pcum->nargs + 1)
4780 if (pcum->iwmmxt_nregs <= 9)
4781 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4784 pcum->can_split = false;
4789 /* Put doubleword aligned quantities in even register pairs. */
4791 && ARM_DOUBLEWORD_ALIGN
4792 && arm_needs_doubleword_align (mode, type))
4795 /* Only allow splitting an arg between regs and memory if all preceding
4796 args were allocated to regs. For args passed by reference we only count
4797 the reference pointer. */
4798 if (pcum->can_split)
4801 nregs = ARM_NUM_REGS2 (mode, type);
4803 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4806 return gen_rtx_REG (mode, pcum->nregs);
4810 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4812 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4813 ? DOUBLEWORD_ALIGNMENT
4818 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4819 tree type, bool named)
4821 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4822 int nregs = pcum->nregs;
4824 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4826 aapcs_layout_arg (pcum, mode, type, named);
4827 return pcum->aapcs_partial;
4830 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4833 if (NUM_ARG_REGS > nregs
4834 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4836 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4841 /* Update the data in PCUM to advance over an argument
4842 of mode MODE and data type TYPE.
4843 (TYPE is null for libcalls where that information may not be available.) */
4846 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4847 const_tree type, bool named)
4849 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4851 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4853 aapcs_layout_arg (pcum, mode, type, named);
4855 if (pcum->aapcs_cprc_slot >= 0)
4857 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4859 pcum->aapcs_cprc_slot = -1;
4862 /* Generic stuff. */
4863 pcum->aapcs_arg_processed = false;
4864 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4865 pcum->aapcs_reg = NULL_RTX;
4866 pcum->aapcs_partial = 0;
4871 if (arm_vector_mode_supported_p (mode)
4872 && pcum->named_count > pcum->nargs
4873 && TARGET_IWMMXT_ABI)
4874 pcum->iwmmxt_nregs += 1;
4876 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4880 /* Variable sized types are passed by reference. This is a GCC
4881 extension to the ARM ABI. */
4884 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4885 enum machine_mode mode ATTRIBUTE_UNUSED,
4886 const_tree type, bool named ATTRIBUTE_UNUSED)
4888 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4891 /* Encode the current state of the #pragma [no_]long_calls. */
4894 OFF, /* No #pragma [no_]long_calls is in effect. */
4895 LONG, /* #pragma long_calls is in effect. */
4896 SHORT /* #pragma no_long_calls is in effect. */
4899 static arm_pragma_enum arm_pragma_long_calls = OFF;
4902 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4904 arm_pragma_long_calls = LONG;
4908 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4910 arm_pragma_long_calls = SHORT;
4914 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4916 arm_pragma_long_calls = OFF;
4919 /* Handle an attribute requiring a FUNCTION_DECL;
4920 arguments as in struct attribute_spec.handler. */
4922 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4923 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4925 if (TREE_CODE (*node) != FUNCTION_DECL)
4927 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4929 *no_add_attrs = true;
4935 /* Handle an "interrupt" or "isr" attribute;
4936 arguments as in struct attribute_spec.handler. */
4938 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4943 if (TREE_CODE (*node) != FUNCTION_DECL)
4945 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4947 *no_add_attrs = true;
4949 /* FIXME: the argument if any is checked for type attributes;
4950 should it be checked for decl ones? */
4954 if (TREE_CODE (*node) == FUNCTION_TYPE
4955 || TREE_CODE (*node) == METHOD_TYPE)
4957 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4959 warning (OPT_Wattributes, "%qE attribute ignored",
4961 *no_add_attrs = true;
4964 else if (TREE_CODE (*node) == POINTER_TYPE
4965 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4966 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4967 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4969 *node = build_variant_type_copy (*node);
4970 TREE_TYPE (*node) = build_type_attribute_variant
4972 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4973 *no_add_attrs = true;
4977 /* Possibly pass this attribute on from the type to a decl. */
4978 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4979 | (int) ATTR_FLAG_FUNCTION_NEXT
4980 | (int) ATTR_FLAG_ARRAY_NEXT))
4982 *no_add_attrs = true;
4983 return tree_cons (name, args, NULL_TREE);
4987 warning (OPT_Wattributes, "%qE attribute ignored",
4996 /* Handle a "pcs" attribute; arguments as in struct
4997 attribute_spec.handler. */
4999 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5000 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5002 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5004 warning (OPT_Wattributes, "%qE attribute ignored", name);
5005 *no_add_attrs = true;
5010 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5011 /* Handle the "notshared" attribute. This attribute is another way of
5012 requesting hidden visibility. ARM's compiler supports
5013 "__declspec(notshared)"; we support the same thing via an
5017 arm_handle_notshared_attribute (tree *node,
5018 tree name ATTRIBUTE_UNUSED,
5019 tree args ATTRIBUTE_UNUSED,
5020 int flags ATTRIBUTE_UNUSED,
5023 tree decl = TYPE_NAME (*node);
5027 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5028 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5029 *no_add_attrs = false;
5035 /* Return 0 if the attributes for two types are incompatible, 1 if they
5036 are compatible, and 2 if they are nearly compatible (which causes a
5037 warning to be generated). */
5039 arm_comp_type_attributes (const_tree type1, const_tree type2)
5043 /* Check for mismatch of non-default calling convention. */
5044 if (TREE_CODE (type1) != FUNCTION_TYPE)
5047 /* Check for mismatched call attributes. */
5048 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5049 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5050 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5051 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5053 /* Only bother to check if an attribute is defined. */
5054 if (l1 | l2 | s1 | s2)
5056 /* If one type has an attribute, the other must have the same attribute. */
5057 if ((l1 != l2) || (s1 != s2))
5060 /* Disallow mixed attributes. */
5061 if ((l1 & s2) || (l2 & s1))
5065 /* Check for mismatched ISR attribute. */
5066 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5068 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5069 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5071 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5078 /* Assigns default attributes to newly defined type. This is used to
5079 set short_call/long_call attributes for function types of
5080 functions defined inside corresponding #pragma scopes. */
5082 arm_set_default_type_attributes (tree type)
5084 /* Add __attribute__ ((long_call)) to all functions, when
5085 inside #pragma long_calls or __attribute__ ((short_call)),
5086 when inside #pragma no_long_calls. */
5087 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5089 tree type_attr_list, attr_name;
5090 type_attr_list = TYPE_ATTRIBUTES (type);
5092 if (arm_pragma_long_calls == LONG)
5093 attr_name = get_identifier ("long_call");
5094 else if (arm_pragma_long_calls == SHORT)
5095 attr_name = get_identifier ("short_call");
5099 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5100 TYPE_ATTRIBUTES (type) = type_attr_list;
5104 /* Return true if DECL is known to be linked into section SECTION. */
5107 arm_function_in_section_p (tree decl, section *section)
5109 /* We can only be certain about functions defined in the same
5110 compilation unit. */
5111 if (!TREE_STATIC (decl))
5114 /* Make sure that SYMBOL always binds to the definition in this
5115 compilation unit. */
5116 if (!targetm.binds_local_p (decl))
5119 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5120 if (!DECL_SECTION_NAME (decl))
5122 /* Make sure that we will not create a unique section for DECL. */
5123 if (flag_function_sections || DECL_ONE_ONLY (decl))
5127 return function_section (decl) == section;
5130 /* Return nonzero if a 32-bit "long_call" should be generated for
5131 a call from the current function to DECL. We generate a long_call
5134 a. has an __attribute__((long call))
5135 or b. is within the scope of a #pragma long_calls
5136 or c. the -mlong-calls command line switch has been specified
5138 However we do not generate a long call if the function:
5140 d. has an __attribute__ ((short_call))
5141 or e. is inside the scope of a #pragma no_long_calls
5142 or f. is defined in the same section as the current function. */
5145 arm_is_long_call_p (tree decl)
5150 return TARGET_LONG_CALLS;
5152 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5153 if (lookup_attribute ("short_call", attrs))
5156 /* For "f", be conservative, and only cater for cases in which the
5157 whole of the current function is placed in the same section. */
5158 if (!flag_reorder_blocks_and_partition
5159 && TREE_CODE (decl) == FUNCTION_DECL
5160 && arm_function_in_section_p (decl, current_function_section ()))
5163 if (lookup_attribute ("long_call", attrs))
5166 return TARGET_LONG_CALLS;
5169 /* Return nonzero if it is ok to make a tail-call to DECL. */
5171 arm_function_ok_for_sibcall (tree decl, tree exp)
5173 unsigned long func_type;
5175 if (cfun->machine->sibcall_blocked)
5178 /* Never tailcall something for which we have no decl, or if we
5179 are generating code for Thumb-1. */
5180 if (decl == NULL || TARGET_THUMB1)
5183 /* The PIC register is live on entry to VxWorks PLT entries, so we
5184 must make the call before restoring the PIC register. */
5185 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5188 /* Cannot tail-call to long calls, since these are out of range of
5189 a branch instruction. */
5190 if (arm_is_long_call_p (decl))
5193 /* If we are interworking and the function is not declared static
5194 then we can't tail-call it unless we know that it exists in this
5195 compilation unit (since it might be a Thumb routine). */
5196 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5199 func_type = arm_current_func_type ();
5200 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5201 if (IS_INTERRUPT (func_type))
5204 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5206 /* Check that the return value locations are the same. For
5207 example that we aren't returning a value from the sibling in
5208 a VFP register but then need to transfer it to a core
5212 a = arm_function_value (TREE_TYPE (exp), decl, false);
5213 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5215 if (!rtx_equal_p (a, b))
5219 /* Never tailcall if function may be called with a misaligned SP. */
5220 if (IS_STACKALIGN (func_type))
5223 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5224 references should become a NOP. Don't convert such calls into
5226 if (TARGET_AAPCS_BASED
5227 && arm_abi == ARM_ABI_AAPCS
5228 && DECL_WEAK (decl))
5231 /* Everything else is ok. */
5236 /* Addressing mode support functions. */
5238 /* Return nonzero if X is a legitimate immediate operand when compiling
5239 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5241 legitimate_pic_operand_p (rtx x)
5243 if (GET_CODE (x) == SYMBOL_REF
5244 || (GET_CODE (x) == CONST
5245 && GET_CODE (XEXP (x, 0)) == PLUS
5246 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5252 /* Record that the current function needs a PIC register. Initialize
5253 cfun->machine->pic_reg if we have not already done so. */
5256 require_pic_register (void)
5258 /* A lot of the logic here is made obscure by the fact that this
5259 routine gets called as part of the rtx cost estimation process.
5260 We don't want those calls to affect any assumptions about the real
5261 function; and further, we can't call entry_of_function() until we
5262 start the real expansion process. */
5263 if (!crtl->uses_pic_offset_table)
5265 gcc_assert (can_create_pseudo_p ());
5266 if (arm_pic_register != INVALID_REGNUM)
5268 if (!cfun->machine->pic_reg)
5269 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5271 /* Play games to avoid marking the function as needing pic
5272 if we are being called as part of the cost-estimation
5274 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5275 crtl->uses_pic_offset_table = 1;
5281 if (!cfun->machine->pic_reg)
5282 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5284 /* Play games to avoid marking the function as needing pic
5285 if we are being called as part of the cost-estimation
5287 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5289 crtl->uses_pic_offset_table = 1;
5292 arm_load_pic_register (0UL);
5297 for (insn = seq; insn; insn = NEXT_INSN (insn))
5299 INSN_LOCATOR (insn) = prologue_locator;
5301 /* We can be called during expansion of PHI nodes, where
5302 we can't yet emit instructions directly in the final
5303 insn stream. Queue the insns on the entry edge, they will
5304 be committed after everything else is expanded. */
5305 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5312 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5314 if (GET_CODE (orig) == SYMBOL_REF
5315 || GET_CODE (orig) == LABEL_REF)
5321 gcc_assert (can_create_pseudo_p ());
5322 reg = gen_reg_rtx (Pmode);
5325 /* VxWorks does not impose a fixed gap between segments; the run-time
5326 gap can be different from the object-file gap. We therefore can't
5327 use GOTOFF unless we are absolutely sure that the symbol is in the
5328 same segment as the GOT. Unfortunately, the flexibility of linker
5329 scripts means that we can't be sure of that in general, so assume
5330 that GOTOFF is never valid on VxWorks. */
5331 if ((GET_CODE (orig) == LABEL_REF
5332 || (GET_CODE (orig) == SYMBOL_REF &&
5333 SYMBOL_REF_LOCAL_P (orig)))
5335 && !TARGET_VXWORKS_RTP)
5336 insn = arm_pic_static_addr (orig, reg);
5342 /* If this function doesn't have a pic register, create one now. */
5343 require_pic_register ();
5345 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5347 /* Make the MEM as close to a constant as possible. */
5348 mem = SET_SRC (pat);
5349 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5350 MEM_READONLY_P (mem) = 1;
5351 MEM_NOTRAP_P (mem) = 1;
5353 insn = emit_insn (pat);
5356 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5358 set_unique_reg_note (insn, REG_EQUAL, orig);
5362 else if (GET_CODE (orig) == CONST)
5366 if (GET_CODE (XEXP (orig, 0)) == PLUS
5367 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5370 /* Handle the case where we have: const (UNSPEC_TLS). */
5371 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5372 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5375 /* Handle the case where we have:
5376 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5378 if (GET_CODE (XEXP (orig, 0)) == PLUS
5379 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5380 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5382 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5388 gcc_assert (can_create_pseudo_p ());
5389 reg = gen_reg_rtx (Pmode);
5392 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5394 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5395 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5396 base == reg ? 0 : reg);
5398 if (GET_CODE (offset) == CONST_INT)
5400 /* The base register doesn't really matter, we only want to
5401 test the index for the appropriate mode. */
5402 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5404 gcc_assert (can_create_pseudo_p ());
5405 offset = force_reg (Pmode, offset);
5408 if (GET_CODE (offset) == CONST_INT)
5409 return plus_constant (Pmode, base, INTVAL (offset));
5412 if (GET_MODE_SIZE (mode) > 4
5413 && (GET_MODE_CLASS (mode) == MODE_INT
5414 || TARGET_SOFT_FLOAT))
5416 emit_insn (gen_addsi3 (reg, base, offset));
5420 return gen_rtx_PLUS (Pmode, base, offset);
5427 /* Find a spare register to use during the prolog of a function. */
5430 thumb_find_work_register (unsigned long pushed_regs_mask)
5434 /* Check the argument registers first as these are call-used. The
5435 register allocation order means that sometimes r3 might be used
5436 but earlier argument registers might not, so check them all. */
5437 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5438 if (!df_regs_ever_live_p (reg))
5441 /* Before going on to check the call-saved registers we can try a couple
5442 more ways of deducing that r3 is available. The first is when we are
5443 pushing anonymous arguments onto the stack and we have less than 4
5444 registers worth of fixed arguments(*). In this case r3 will be part of
5445 the variable argument list and so we can be sure that it will be
5446 pushed right at the start of the function. Hence it will be available
5447 for the rest of the prologue.
5448 (*): ie crtl->args.pretend_args_size is greater than 0. */
5449 if (cfun->machine->uses_anonymous_args
5450 && crtl->args.pretend_args_size > 0)
5451 return LAST_ARG_REGNUM;
5453 /* The other case is when we have fixed arguments but less than 4 registers
5454 worth. In this case r3 might be used in the body of the function, but
5455 it is not being used to convey an argument into the function. In theory
5456 we could just check crtl->args.size to see how many bytes are
5457 being passed in argument registers, but it seems that it is unreliable.
5458 Sometimes it will have the value 0 when in fact arguments are being
5459 passed. (See testcase execute/20021111-1.c for an example). So we also
5460 check the args_info.nregs field as well. The problem with this field is
5461 that it makes no allowances for arguments that are passed to the
5462 function but which are not used. Hence we could miss an opportunity
5463 when a function has an unused argument in r3. But it is better to be
5464 safe than to be sorry. */
5465 if (! cfun->machine->uses_anonymous_args
5466 && crtl->args.size >= 0
5467 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5468 && crtl->args.info.nregs < 4)
5469 return LAST_ARG_REGNUM;
5471 /* Otherwise look for a call-saved register that is going to be pushed. */
5472 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5473 if (pushed_regs_mask & (1 << reg))
5478 /* Thumb-2 can use high regs. */
5479 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5480 if (pushed_regs_mask & (1 << reg))
5483 /* Something went wrong - thumb_compute_save_reg_mask()
5484 should have arranged for a suitable register to be pushed. */
5488 static GTY(()) int pic_labelno;
5490 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5494 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5496 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5498 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5501 gcc_assert (flag_pic);
5503 pic_reg = cfun->machine->pic_reg;
5504 if (TARGET_VXWORKS_RTP)
5506 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5507 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5508 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5510 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5512 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5513 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5517 /* We use an UNSPEC rather than a LABEL_REF because this label
5518 never appears in the code stream. */
5520 labelno = GEN_INT (pic_labelno++);
5521 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5522 l1 = gen_rtx_CONST (VOIDmode, l1);
5524 /* On the ARM the PC register contains 'dot + 8' at the time of the
5525 addition, on the Thumb it is 'dot + 4'. */
5526 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5527 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5529 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5533 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5535 else /* TARGET_THUMB1 */
5537 if (arm_pic_register != INVALID_REGNUM
5538 && REGNO (pic_reg) > LAST_LO_REGNUM)
5540 /* We will have pushed the pic register, so we should always be
5541 able to find a work register. */
5542 pic_tmp = gen_rtx_REG (SImode,
5543 thumb_find_work_register (saved_regs));
5544 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5545 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5546 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5549 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5553 /* Need to emit this whether or not we obey regdecls,
5554 since setjmp/longjmp can cause life info to screw up. */
5558 /* Generate code to load the address of a static var when flag_pic is set. */
5560 arm_pic_static_addr (rtx orig, rtx reg)
5562 rtx l1, labelno, offset_rtx, insn;
5564 gcc_assert (flag_pic);
5566 /* We use an UNSPEC rather than a LABEL_REF because this label
5567 never appears in the code stream. */
5568 labelno = GEN_INT (pic_labelno++);
5569 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5570 l1 = gen_rtx_CONST (VOIDmode, l1);
5572 /* On the ARM the PC register contains 'dot + 8' at the time of the
5573 addition, on the Thumb it is 'dot + 4'. */
5574 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5575 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5576 UNSPEC_SYMBOL_OFFSET);
5577 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5579 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5583 /* Return nonzero if X is valid as an ARM state addressing register. */
5585 arm_address_register_rtx_p (rtx x, int strict_p)
5589 if (GET_CODE (x) != REG)
5595 return ARM_REGNO_OK_FOR_BASE_P (regno);
5597 return (regno <= LAST_ARM_REGNUM
5598 || regno >= FIRST_PSEUDO_REGISTER
5599 || regno == FRAME_POINTER_REGNUM
5600 || regno == ARG_POINTER_REGNUM);
5603 /* Return TRUE if this rtx is the difference of a symbol and a label,
5604 and will reduce to a PC-relative relocation in the object file.
5605 Expressions like this can be left alone when generating PIC, rather
5606 than forced through the GOT. */
5608 pcrel_constant_p (rtx x)
5610 if (GET_CODE (x) == MINUS)
5611 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5616 /* Return true if X will surely end up in an index register after next
5619 will_be_in_index_register (const_rtx x)
5621 /* arm.md: calculate_pic_address will split this into a register. */
5622 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5625 /* Return nonzero if X is a valid ARM state address operand. */
5627 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5631 enum rtx_code code = GET_CODE (x);
5633 if (arm_address_register_rtx_p (x, strict_p))
5636 use_ldrd = (TARGET_LDRD
5638 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5640 if (code == POST_INC || code == PRE_DEC
5641 || ((code == PRE_INC || code == POST_DEC)
5642 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5643 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5645 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5646 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5647 && GET_CODE (XEXP (x, 1)) == PLUS
5648 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5650 rtx addend = XEXP (XEXP (x, 1), 1);
5652 /* Don't allow ldrd post increment by register because it's hard
5653 to fixup invalid register choices. */
5655 && GET_CODE (x) == POST_MODIFY
5656 && GET_CODE (addend) == REG)
5659 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5660 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5663 /* After reload constants split into minipools will have addresses
5664 from a LABEL_REF. */
5665 else if (reload_completed
5666 && (code == LABEL_REF
5668 && GET_CODE (XEXP (x, 0)) == PLUS
5669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5670 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5673 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5676 else if (code == PLUS)
5678 rtx xop0 = XEXP (x, 0);
5679 rtx xop1 = XEXP (x, 1);
5681 return ((arm_address_register_rtx_p (xop0, strict_p)
5682 && ((GET_CODE(xop1) == CONST_INT
5683 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5684 || (!strict_p && will_be_in_index_register (xop1))))
5685 || (arm_address_register_rtx_p (xop1, strict_p)
5686 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5690 /* Reload currently can't handle MINUS, so disable this for now */
5691 else if (GET_CODE (x) == MINUS)
5693 rtx xop0 = XEXP (x, 0);
5694 rtx xop1 = XEXP (x, 1);
5696 return (arm_address_register_rtx_p (xop0, strict_p)
5697 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5701 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5702 && code == SYMBOL_REF
5703 && CONSTANT_POOL_ADDRESS_P (x)
5705 && symbol_mentioned_p (get_pool_constant (x))
5706 && ! pcrel_constant_p (get_pool_constant (x))))
5712 /* Return nonzero if X is a valid Thumb-2 address operand. */
5714 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5717 enum rtx_code code = GET_CODE (x);
5719 if (arm_address_register_rtx_p (x, strict_p))
5722 use_ldrd = (TARGET_LDRD
5724 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5726 if (code == POST_INC || code == PRE_DEC
5727 || ((code == PRE_INC || code == POST_DEC)
5728 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5729 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5731 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5732 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5733 && GET_CODE (XEXP (x, 1)) == PLUS
5734 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5736 /* Thumb-2 only has autoincrement by constant. */
5737 rtx addend = XEXP (XEXP (x, 1), 1);
5738 HOST_WIDE_INT offset;
5740 if (GET_CODE (addend) != CONST_INT)
5743 offset = INTVAL(addend);
5744 if (GET_MODE_SIZE (mode) <= 4)
5745 return (offset > -256 && offset < 256);
5747 return (use_ldrd && offset > -1024 && offset < 1024
5748 && (offset & 3) == 0);
5751 /* After reload constants split into minipools will have addresses
5752 from a LABEL_REF. */
5753 else if (reload_completed
5754 && (code == LABEL_REF
5756 && GET_CODE (XEXP (x, 0)) == PLUS
5757 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5758 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5761 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5764 else if (code == PLUS)
5766 rtx xop0 = XEXP (x, 0);
5767 rtx xop1 = XEXP (x, 1);
5769 return ((arm_address_register_rtx_p (xop0, strict_p)
5770 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5771 || (!strict_p && will_be_in_index_register (xop1))))
5772 || (arm_address_register_rtx_p (xop1, strict_p)
5773 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5776 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5777 && code == SYMBOL_REF
5778 && CONSTANT_POOL_ADDRESS_P (x)
5780 && symbol_mentioned_p (get_pool_constant (x))
5781 && ! pcrel_constant_p (get_pool_constant (x))))
5787 /* Return nonzero if INDEX is valid for an address index operand in
5790 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5793 HOST_WIDE_INT range;
5794 enum rtx_code code = GET_CODE (index);
5796 /* Standard coprocessor addressing modes. */
5797 if (TARGET_HARD_FLOAT
5799 && (mode == SFmode || mode == DFmode))
5800 return (code == CONST_INT && INTVAL (index) < 1024
5801 && INTVAL (index) > -1024
5802 && (INTVAL (index) & 3) == 0);
5804 /* For quad modes, we restrict the constant offset to be slightly less
5805 than what the instruction format permits. We do this because for
5806 quad mode moves, we will actually decompose them into two separate
5807 double-mode reads or writes. INDEX must therefore be a valid
5808 (double-mode) offset and so should INDEX+8. */
5809 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5810 return (code == CONST_INT
5811 && INTVAL (index) < 1016
5812 && INTVAL (index) > -1024
5813 && (INTVAL (index) & 3) == 0);
5815 /* We have no such constraint on double mode offsets, so we permit the
5816 full range of the instruction format. */
5817 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5818 return (code == CONST_INT
5819 && INTVAL (index) < 1024
5820 && INTVAL (index) > -1024
5821 && (INTVAL (index) & 3) == 0);
5823 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5824 return (code == CONST_INT
5825 && INTVAL (index) < 1024
5826 && INTVAL (index) > -1024
5827 && (INTVAL (index) & 3) == 0);
5829 if (arm_address_register_rtx_p (index, strict_p)
5830 && (GET_MODE_SIZE (mode) <= 4))
5833 if (mode == DImode || mode == DFmode)
5835 if (code == CONST_INT)
5837 HOST_WIDE_INT val = INTVAL (index);
5840 return val > -256 && val < 256;
5842 return val > -4096 && val < 4092;
5845 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5848 if (GET_MODE_SIZE (mode) <= 4
5852 || (mode == QImode && outer == SIGN_EXTEND))))
5856 rtx xiop0 = XEXP (index, 0);
5857 rtx xiop1 = XEXP (index, 1);
5859 return ((arm_address_register_rtx_p (xiop0, strict_p)
5860 && power_of_two_operand (xiop1, SImode))
5861 || (arm_address_register_rtx_p (xiop1, strict_p)
5862 && power_of_two_operand (xiop0, SImode)));
5864 else if (code == LSHIFTRT || code == ASHIFTRT
5865 || code == ASHIFT || code == ROTATERT)
5867 rtx op = XEXP (index, 1);
5869 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5870 && GET_CODE (op) == CONST_INT
5872 && INTVAL (op) <= 31);
5876 /* For ARM v4 we may be doing a sign-extend operation during the
5882 || (outer == SIGN_EXTEND && mode == QImode))
5888 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5890 return (code == CONST_INT
5891 && INTVAL (index) < range
5892 && INTVAL (index) > -range);
5895 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5896 index operand. i.e. 1, 2, 4 or 8. */
5898 thumb2_index_mul_operand (rtx op)
5902 if (GET_CODE(op) != CONST_INT)
5906 return (val == 1 || val == 2 || val == 4 || val == 8);
5909 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5911 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5913 enum rtx_code code = GET_CODE (index);
5915 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5916 /* Standard coprocessor addressing modes. */
5917 if (TARGET_HARD_FLOAT
5919 && (mode == SFmode || mode == DFmode))
5920 return (code == CONST_INT && INTVAL (index) < 1024
5921 /* Thumb-2 allows only > -256 index range for it's core register
5922 load/stores. Since we allow SF/DF in core registers, we have
5923 to use the intersection between -256~4096 (core) and -1024~1024
5925 && INTVAL (index) > -256
5926 && (INTVAL (index) & 3) == 0);
5928 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5930 /* For DImode assume values will usually live in core regs
5931 and only allow LDRD addressing modes. */
5932 if (!TARGET_LDRD || mode != DImode)
5933 return (code == CONST_INT
5934 && INTVAL (index) < 1024
5935 && INTVAL (index) > -1024
5936 && (INTVAL (index) & 3) == 0);
5939 /* For quad modes, we restrict the constant offset to be slightly less
5940 than what the instruction format permits. We do this because for
5941 quad mode moves, we will actually decompose them into two separate
5942 double-mode reads or writes. INDEX must therefore be a valid
5943 (double-mode) offset and so should INDEX+8. */
5944 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5945 return (code == CONST_INT
5946 && INTVAL (index) < 1016
5947 && INTVAL (index) > -1024
5948 && (INTVAL (index) & 3) == 0);
5950 /* We have no such constraint on double mode offsets, so we permit the
5951 full range of the instruction format. */
5952 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5953 return (code == CONST_INT
5954 && INTVAL (index) < 1024
5955 && INTVAL (index) > -1024
5956 && (INTVAL (index) & 3) == 0);
5958 if (arm_address_register_rtx_p (index, strict_p)
5959 && (GET_MODE_SIZE (mode) <= 4))
5962 if (mode == DImode || mode == DFmode)
5964 if (code == CONST_INT)
5966 HOST_WIDE_INT val = INTVAL (index);
5967 /* ??? Can we assume ldrd for thumb2? */
5968 /* Thumb-2 ldrd only has reg+const addressing modes. */
5969 /* ldrd supports offsets of +-1020.
5970 However the ldr fallback does not. */
5971 return val > -256 && val < 256 && (val & 3) == 0;
5979 rtx xiop0 = XEXP (index, 0);
5980 rtx xiop1 = XEXP (index, 1);
5982 return ((arm_address_register_rtx_p (xiop0, strict_p)
5983 && thumb2_index_mul_operand (xiop1))
5984 || (arm_address_register_rtx_p (xiop1, strict_p)
5985 && thumb2_index_mul_operand (xiop0)));
5987 else if (code == ASHIFT)
5989 rtx op = XEXP (index, 1);
5991 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5992 && GET_CODE (op) == CONST_INT
5994 && INTVAL (op) <= 3);
5997 return (code == CONST_INT
5998 && INTVAL (index) < 4096
5999 && INTVAL (index) > -256);
6002 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6004 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6008 if (GET_CODE (x) != REG)
6014 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6016 return (regno <= LAST_LO_REGNUM
6017 || regno > LAST_VIRTUAL_REGISTER
6018 || regno == FRAME_POINTER_REGNUM
6019 || (GET_MODE_SIZE (mode) >= 4
6020 && (regno == STACK_POINTER_REGNUM
6021 || regno >= FIRST_PSEUDO_REGISTER
6022 || x == hard_frame_pointer_rtx
6023 || x == arg_pointer_rtx)));
6026 /* Return nonzero if x is a legitimate index register. This is the case
6027 for any base register that can access a QImode object. */
6029 thumb1_index_register_rtx_p (rtx x, int strict_p)
6031 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6034 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6036 The AP may be eliminated to either the SP or the FP, so we use the
6037 least common denominator, e.g. SImode, and offsets from 0 to 64.
6039 ??? Verify whether the above is the right approach.
6041 ??? Also, the FP may be eliminated to the SP, so perhaps that
6042 needs special handling also.
6044 ??? Look at how the mips16 port solves this problem. It probably uses
6045 better ways to solve some of these problems.
6047 Although it is not incorrect, we don't accept QImode and HImode
6048 addresses based on the frame pointer or arg pointer until the
6049 reload pass starts. This is so that eliminating such addresses
6050 into stack based ones won't produce impossible code. */
6052 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6054 /* ??? Not clear if this is right. Experiment. */
6055 if (GET_MODE_SIZE (mode) < 4
6056 && !(reload_in_progress || reload_completed)
6057 && (reg_mentioned_p (frame_pointer_rtx, x)
6058 || reg_mentioned_p (arg_pointer_rtx, x)
6059 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6060 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6061 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6062 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6065 /* Accept any base register. SP only in SImode or larger. */
6066 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6069 /* This is PC relative data before arm_reorg runs. */
6070 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6071 && GET_CODE (x) == SYMBOL_REF
6072 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6075 /* This is PC relative data after arm_reorg runs. */
6076 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6078 && (GET_CODE (x) == LABEL_REF
6079 || (GET_CODE (x) == CONST
6080 && GET_CODE (XEXP (x, 0)) == PLUS
6081 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6082 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6085 /* Post-inc indexing only supported for SImode and larger. */
6086 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6087 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6090 else if (GET_CODE (x) == PLUS)
6092 /* REG+REG address can be any two index registers. */
6093 /* We disallow FRAME+REG addressing since we know that FRAME
6094 will be replaced with STACK, and SP relative addressing only
6095 permits SP+OFFSET. */
6096 if (GET_MODE_SIZE (mode) <= 4
6097 && XEXP (x, 0) != frame_pointer_rtx
6098 && XEXP (x, 1) != frame_pointer_rtx
6099 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6100 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6101 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6104 /* REG+const has 5-7 bit offset for non-SP registers. */
6105 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6106 || XEXP (x, 0) == arg_pointer_rtx)
6107 && GET_CODE (XEXP (x, 1)) == CONST_INT
6108 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6111 /* REG+const has 10-bit offset for SP, but only SImode and
6112 larger is supported. */
6113 /* ??? Should probably check for DI/DFmode overflow here
6114 just like GO_IF_LEGITIMATE_OFFSET does. */
6115 else if (GET_CODE (XEXP (x, 0)) == REG
6116 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6117 && GET_MODE_SIZE (mode) >= 4
6118 && GET_CODE (XEXP (x, 1)) == CONST_INT
6119 && INTVAL (XEXP (x, 1)) >= 0
6120 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6121 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6124 else if (GET_CODE (XEXP (x, 0)) == REG
6125 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6126 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6127 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6128 && REGNO (XEXP (x, 0))
6129 <= LAST_VIRTUAL_POINTER_REGISTER))
6130 && GET_MODE_SIZE (mode) >= 4
6131 && GET_CODE (XEXP (x, 1)) == CONST_INT
6132 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6136 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6137 && GET_MODE_SIZE (mode) == 4
6138 && GET_CODE (x) == SYMBOL_REF
6139 && CONSTANT_POOL_ADDRESS_P (x)
6141 && symbol_mentioned_p (get_pool_constant (x))
6142 && ! pcrel_constant_p (get_pool_constant (x))))
6148 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6149 instruction of mode MODE. */
6151 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6153 switch (GET_MODE_SIZE (mode))
6156 return val >= 0 && val < 32;
6159 return val >= 0 && val < 64 && (val & 1) == 0;
6163 && (val + GET_MODE_SIZE (mode)) <= 128
6169 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6172 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6173 else if (TARGET_THUMB2)
6174 return thumb2_legitimate_address_p (mode, x, strict_p);
6175 else /* if (TARGET_THUMB1) */
6176 return thumb1_legitimate_address_p (mode, x, strict_p);
6179 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6181 Given an rtx X being reloaded into a reg required to be
6182 in class CLASS, return the class of reg to actually use.
6183 In general this is just CLASS, but for the Thumb core registers and
6184 immediate constants we prefer a LO_REGS class or a subset. */
6187 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6193 if (rclass == GENERAL_REGS
6194 || rclass == HI_REGS
6195 || rclass == NO_REGS
6196 || rclass == STACK_REG)
6203 /* Build the SYMBOL_REF for __tls_get_addr. */
6205 static GTY(()) rtx tls_get_addr_libfunc;
6208 get_tls_get_addr (void)
6210 if (!tls_get_addr_libfunc)
6211 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6212 return tls_get_addr_libfunc;
6216 arm_load_tp (rtx target)
6219 target = gen_reg_rtx (SImode);
6223 /* Can return in any reg. */
6224 emit_insn (gen_load_tp_hard (target));
6228 /* Always returned in r0. Immediately copy the result into a pseudo,
6229 otherwise other uses of r0 (e.g. setting up function arguments) may
6230 clobber the value. */
6234 emit_insn (gen_load_tp_soft ());
6236 tmp = gen_rtx_REG (SImode, 0);
6237 emit_move_insn (target, tmp);
6243 load_tls_operand (rtx x, rtx reg)
6247 if (reg == NULL_RTX)
6248 reg = gen_reg_rtx (SImode);
6250 tmp = gen_rtx_CONST (SImode, x);
6252 emit_move_insn (reg, tmp);
6258 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6260 rtx insns, label, labelno, sum;
6262 gcc_assert (reloc != TLS_DESCSEQ);
6265 labelno = GEN_INT (pic_labelno++);
6266 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6267 label = gen_rtx_CONST (VOIDmode, label);
6269 sum = gen_rtx_UNSPEC (Pmode,
6270 gen_rtvec (4, x, GEN_INT (reloc), label,
6271 GEN_INT (TARGET_ARM ? 8 : 4)),
6273 reg = load_tls_operand (sum, reg);
6276 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6278 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6280 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6281 LCT_PURE, /* LCT_CONST? */
6282 Pmode, 1, reg, Pmode);
6284 insns = get_insns ();
6291 arm_tls_descseq_addr (rtx x, rtx reg)
6293 rtx labelno = GEN_INT (pic_labelno++);
6294 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6295 rtx sum = gen_rtx_UNSPEC (Pmode,
6296 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6297 gen_rtx_CONST (VOIDmode, label),
6298 GEN_INT (!TARGET_ARM)),
6300 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6302 emit_insn (gen_tlscall (x, labelno));
6304 reg = gen_reg_rtx (SImode);
6306 gcc_assert (REGNO (reg) != 0);
6308 emit_move_insn (reg, reg0);
6314 legitimize_tls_address (rtx x, rtx reg)
6316 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6317 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6321 case TLS_MODEL_GLOBAL_DYNAMIC:
6322 if (TARGET_GNU2_TLS)
6324 reg = arm_tls_descseq_addr (x, reg);
6326 tp = arm_load_tp (NULL_RTX);
6328 dest = gen_rtx_PLUS (Pmode, tp, reg);
6332 /* Original scheme */
6333 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6334 dest = gen_reg_rtx (Pmode);
6335 emit_libcall_block (insns, dest, ret, x);
6339 case TLS_MODEL_LOCAL_DYNAMIC:
6340 if (TARGET_GNU2_TLS)
6342 reg = arm_tls_descseq_addr (x, reg);
6344 tp = arm_load_tp (NULL_RTX);
6346 dest = gen_rtx_PLUS (Pmode, tp, reg);
6350 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6352 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6353 share the LDM result with other LD model accesses. */
6354 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6356 dest = gen_reg_rtx (Pmode);
6357 emit_libcall_block (insns, dest, ret, eqv);
6359 /* Load the addend. */
6360 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6361 GEN_INT (TLS_LDO32)),
6363 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6364 dest = gen_rtx_PLUS (Pmode, dest, addend);
6368 case TLS_MODEL_INITIAL_EXEC:
6369 labelno = GEN_INT (pic_labelno++);
6370 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6371 label = gen_rtx_CONST (VOIDmode, label);
6372 sum = gen_rtx_UNSPEC (Pmode,
6373 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6374 GEN_INT (TARGET_ARM ? 8 : 4)),
6376 reg = load_tls_operand (sum, reg);
6379 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6380 else if (TARGET_THUMB2)
6381 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6384 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6385 emit_move_insn (reg, gen_const_mem (SImode, reg));
6388 tp = arm_load_tp (NULL_RTX);
6390 return gen_rtx_PLUS (Pmode, tp, reg);
6392 case TLS_MODEL_LOCAL_EXEC:
6393 tp = arm_load_tp (NULL_RTX);
6395 reg = gen_rtx_UNSPEC (Pmode,
6396 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6398 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6400 return gen_rtx_PLUS (Pmode, tp, reg);
6407 /* Try machine-dependent ways of modifying an illegitimate address
6408 to be legitimate. If we find one, return the new, valid address. */
6410 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6414 /* TODO: legitimize_address for Thumb2. */
6417 return thumb_legitimize_address (x, orig_x, mode);
6420 if (arm_tls_symbol_p (x))
6421 return legitimize_tls_address (x, NULL_RTX);
6423 if (GET_CODE (x) == PLUS)
6425 rtx xop0 = XEXP (x, 0);
6426 rtx xop1 = XEXP (x, 1);
6428 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6429 xop0 = force_reg (SImode, xop0);
6431 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6432 xop1 = force_reg (SImode, xop1);
6434 if (ARM_BASE_REGISTER_RTX_P (xop0)
6435 && GET_CODE (xop1) == CONST_INT)
6437 HOST_WIDE_INT n, low_n;
6441 /* VFP addressing modes actually allow greater offsets, but for
6442 now we just stick with the lowest common denominator. */
6444 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6456 low_n = ((mode) == TImode ? 0
6457 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6461 base_reg = gen_reg_rtx (SImode);
6462 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6463 emit_move_insn (base_reg, val);
6464 x = plus_constant (Pmode, base_reg, low_n);
6466 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6467 x = gen_rtx_PLUS (SImode, xop0, xop1);
6470 /* XXX We don't allow MINUS any more -- see comment in
6471 arm_legitimate_address_outer_p (). */
6472 else if (GET_CODE (x) == MINUS)
6474 rtx xop0 = XEXP (x, 0);
6475 rtx xop1 = XEXP (x, 1);
6477 if (CONSTANT_P (xop0))
6478 xop0 = force_reg (SImode, xop0);
6480 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6481 xop1 = force_reg (SImode, xop1);
6483 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6484 x = gen_rtx_MINUS (SImode, xop0, xop1);
6487 /* Make sure to take full advantage of the pre-indexed addressing mode
6488 with absolute addresses which often allows for the base register to
6489 be factorized for multiple adjacent memory references, and it might
6490 even allows for the mini pool to be avoided entirely. */
6491 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6494 HOST_WIDE_INT mask, base, index;
6497 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6498 use a 8-bit index. So let's use a 12-bit index for SImode only and
6499 hope that arm_gen_constant will enable ldrb to use more bits. */
6500 bits = (mode == SImode) ? 12 : 8;
6501 mask = (1 << bits) - 1;
6502 base = INTVAL (x) & ~mask;
6503 index = INTVAL (x) & mask;
6504 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6506 /* It'll most probably be more efficient to generate the base
6507 with more bits set and use a negative index instead. */
6511 base_reg = force_reg (SImode, GEN_INT (base));
6512 x = plus_constant (Pmode, base_reg, index);
6517 /* We need to find and carefully transform any SYMBOL and LABEL
6518 references; so go back to the original address expression. */
6519 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6521 if (new_x != orig_x)
6529 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6530 to be legitimate. If we find one, return the new, valid address. */
6532 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6534 if (arm_tls_symbol_p (x))
6535 return legitimize_tls_address (x, NULL_RTX);
6537 if (GET_CODE (x) == PLUS
6538 && GET_CODE (XEXP (x, 1)) == CONST_INT
6539 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6540 || INTVAL (XEXP (x, 1)) < 0))
6542 rtx xop0 = XEXP (x, 0);
6543 rtx xop1 = XEXP (x, 1);
6544 HOST_WIDE_INT offset = INTVAL (xop1);
6546 /* Try and fold the offset into a biasing of the base register and
6547 then offsetting that. Don't do this when optimizing for space
6548 since it can cause too many CSEs. */
6549 if (optimize_size && offset >= 0
6550 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6552 HOST_WIDE_INT delta;
6555 delta = offset - (256 - GET_MODE_SIZE (mode));
6556 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6557 delta = 31 * GET_MODE_SIZE (mode);
6559 delta = offset & (~31 * GET_MODE_SIZE (mode));
6561 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6563 x = plus_constant (Pmode, xop0, delta);
6565 else if (offset < 0 && offset > -256)
6566 /* Small negative offsets are best done with a subtract before the
6567 dereference, forcing these into a register normally takes two
6569 x = force_operand (x, NULL_RTX);
6572 /* For the remaining cases, force the constant into a register. */
6573 xop1 = force_reg (SImode, xop1);
6574 x = gen_rtx_PLUS (SImode, xop0, xop1);
6577 else if (GET_CODE (x) == PLUS
6578 && s_register_operand (XEXP (x, 1), SImode)
6579 && !s_register_operand (XEXP (x, 0), SImode))
6581 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6583 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6588 /* We need to find and carefully transform any SYMBOL and LABEL
6589 references; so go back to the original address expression. */
6590 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6592 if (new_x != orig_x)
6600 arm_legitimize_reload_address (rtx *p,
6601 enum machine_mode mode,
6602 int opnum, int type,
6603 int ind_levels ATTRIBUTE_UNUSED)
6605 /* We must recognize output that we have already generated ourselves. */
6606 if (GET_CODE (*p) == PLUS
6607 && GET_CODE (XEXP (*p, 0)) == PLUS
6608 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6609 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6610 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6612 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6613 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6614 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6618 if (GET_CODE (*p) == PLUS
6619 && GET_CODE (XEXP (*p, 0)) == REG
6620 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6621 /* If the base register is equivalent to a constant, let the generic
6622 code handle it. Otherwise we will run into problems if a future
6623 reload pass decides to rematerialize the constant. */
6624 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6625 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6627 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6628 HOST_WIDE_INT low, high;
6630 /* Detect coprocessor load/stores. */
6631 bool coproc_p = ((TARGET_HARD_FLOAT
6633 && (mode == SFmode || mode == DFmode))
6634 || (TARGET_REALLY_IWMMXT
6635 && VALID_IWMMXT_REG_MODE (mode))
6637 && (VALID_NEON_DREG_MODE (mode)
6638 || VALID_NEON_QREG_MODE (mode))));
6640 /* For some conditions, bail out when lower two bits are unaligned. */
6641 if ((val & 0x3) != 0
6642 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6644 /* For DI, and DF under soft-float: */
6645 || ((mode == DImode || mode == DFmode)
6646 /* Without ldrd, we use stm/ldm, which does not
6647 fair well with unaligned bits. */
6649 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6650 || TARGET_THUMB2))))
6653 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6654 of which the (reg+high) gets turned into a reload add insn,
6655 we try to decompose the index into high/low values that can often
6656 also lead to better reload CSE.
6658 ldr r0, [r2, #4100] // Offset too large
6659 ldr r1, [r2, #4104] // Offset too large
6661 is best reloaded as:
6667 which post-reload CSE can simplify in most cases to eliminate the
6668 second add instruction:
6673 The idea here is that we want to split out the bits of the constant
6674 as a mask, rather than as subtracting the maximum offset that the
6675 respective type of load/store used can handle.
6677 When encountering negative offsets, we can still utilize it even if
6678 the overall offset is positive; sometimes this may lead to an immediate
6679 that can be constructed with fewer instructions.
6681 ldr r0, [r2, #0x3FFFFC]
6683 This is best reloaded as:
6684 add t1, r2, #0x400000
6687 The trick for spotting this for a load insn with N bits of offset
6688 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6689 negative offset that is going to make bit N and all the bits below
6690 it become zero in the remainder part.
6692 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6693 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6694 used in most cases of ARM load/store instructions. */
6696 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6697 (((VAL) & ((1 << (N)) - 1)) \
6698 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6703 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6705 /* NEON quad-word load/stores are made of two double-word accesses,
6706 so the valid index range is reduced by 8. Treat as 9-bit range if
6708 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6709 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6711 else if (GET_MODE_SIZE (mode) == 8)
6714 low = (TARGET_THUMB2
6715 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6716 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6718 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6719 to access doublewords. The supported load/store offsets are
6720 -8, -4, and 4, which we try to produce here. */
6721 low = ((val & 0xf) ^ 0x8) - 0x8;
6723 else if (GET_MODE_SIZE (mode) < 8)
6725 /* NEON element load/stores do not have an offset. */
6726 if (TARGET_NEON_FP16 && mode == HFmode)
6731 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6732 Try the wider 12-bit range first, and re-try if the result
6734 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6736 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6740 if (mode == HImode || mode == HFmode)
6743 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6746 /* The storehi/movhi_bytes fallbacks can use only
6747 [-4094,+4094] of the full ldrb/strb index range. */
6748 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6749 if (low == 4095 || low == -4095)
6754 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6760 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6761 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6762 - (unsigned HOST_WIDE_INT) 0x80000000);
6763 /* Check for overflow or zero */
6764 if (low == 0 || high == 0 || (high + low != val))
6767 /* Reload the high part into a base reg; leave the low part
6769 *p = gen_rtx_PLUS (GET_MODE (*p),
6770 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6773 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6774 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6775 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6783 thumb_legitimize_reload_address (rtx *x_p,
6784 enum machine_mode mode,
6785 int opnum, int type,
6786 int ind_levels ATTRIBUTE_UNUSED)
6790 if (GET_CODE (x) == PLUS
6791 && GET_MODE_SIZE (mode) < 4
6792 && REG_P (XEXP (x, 0))
6793 && XEXP (x, 0) == stack_pointer_rtx
6794 && GET_CODE (XEXP (x, 1)) == CONST_INT
6795 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6800 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6801 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6805 /* If both registers are hi-regs, then it's better to reload the
6806 entire expression rather than each register individually. That
6807 only requires one reload register rather than two. */
6808 if (GET_CODE (x) == PLUS
6809 && REG_P (XEXP (x, 0))
6810 && REG_P (XEXP (x, 1))
6811 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6812 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6817 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6818 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6825 /* Test for various thread-local symbols. */
6827 /* Return TRUE if X is a thread-local symbol. */
6830 arm_tls_symbol_p (rtx x)
6832 if (! TARGET_HAVE_TLS)
6835 if (GET_CODE (x) != SYMBOL_REF)
6838 return SYMBOL_REF_TLS_MODEL (x) != 0;
6841 /* Helper for arm_tls_referenced_p. */
6844 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6846 if (GET_CODE (*x) == SYMBOL_REF)
6847 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6849 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6850 TLS offsets, not real symbol references. */
6851 if (GET_CODE (*x) == UNSPEC
6852 && XINT (*x, 1) == UNSPEC_TLS)
6858 /* Return TRUE if X contains any TLS symbol references. */
6861 arm_tls_referenced_p (rtx x)
6863 if (! TARGET_HAVE_TLS)
6866 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6869 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6871 On the ARM, allow any integer (invalid ones are removed later by insn
6872 patterns), nice doubles and symbol_refs which refer to the function's
6875 When generating pic allow anything. */
6878 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6880 /* At present, we have no support for Neon structure constants, so forbid
6881 them here. It might be possible to handle simple cases like 0 and -1
6883 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6886 return flag_pic || !label_mentioned_p (x);
6890 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6892 return (GET_CODE (x) == CONST_INT
6893 || GET_CODE (x) == CONST_DOUBLE
6894 || CONSTANT_ADDRESS_P (x)
6899 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6901 return (!arm_cannot_force_const_mem (mode, x)
6903 ? arm_legitimate_constant_p_1 (mode, x)
6904 : thumb_legitimate_constant_p (mode, x)));
6907 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6910 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6914 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6916 split_const (x, &base, &offset);
6917 if (GET_CODE (base) == SYMBOL_REF
6918 && !offset_within_block_p (base, INTVAL (offset)))
6921 return arm_tls_referenced_p (x);
6924 #define REG_OR_SUBREG_REG(X) \
6925 (GET_CODE (X) == REG \
6926 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6928 #define REG_OR_SUBREG_RTX(X) \
6929 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6932 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6934 enum machine_mode mode = GET_MODE (x);
6948 return COSTS_N_INSNS (1);
6951 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6954 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6961 return COSTS_N_INSNS (2) + cycles;
6963 return COSTS_N_INSNS (1) + 16;
6966 return (COSTS_N_INSNS (1)
6967 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6968 + GET_CODE (SET_DEST (x)) == MEM));
6973 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6975 if (thumb_shiftable_const (INTVAL (x)))
6976 return COSTS_N_INSNS (2);
6977 return COSTS_N_INSNS (3);
6979 else if ((outer == PLUS || outer == COMPARE)
6980 && INTVAL (x) < 256 && INTVAL (x) > -256)
6982 else if ((outer == IOR || outer == XOR || outer == AND)
6983 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6984 return COSTS_N_INSNS (1);
6985 else if (outer == AND)
6988 /* This duplicates the tests in the andsi3 expander. */
6989 for (i = 9; i <= 31; i++)
6990 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6991 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6992 return COSTS_N_INSNS (2);
6994 else if (outer == ASHIFT || outer == ASHIFTRT
6995 || outer == LSHIFTRT)
6997 return COSTS_N_INSNS (2);
7003 return COSTS_N_INSNS (3);
7021 /* XXX another guess. */
7022 /* Memory costs quite a lot for the first word, but subsequent words
7023 load at the equivalent of a single insn each. */
7024 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7025 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7030 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7036 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7037 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7043 return total + COSTS_N_INSNS (1);
7045 /* Assume a two-shift sequence. Increase the cost slightly so
7046 we prefer actual shifts over an extend operation. */
7047 return total + 1 + COSTS_N_INSNS (2);
7055 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7057 enum machine_mode mode = GET_MODE (x);
7058 enum rtx_code subcode;
7060 enum rtx_code code = GET_CODE (x);
7066 /* Memory costs quite a lot for the first word, but subsequent words
7067 load at the equivalent of a single insn each. */
7068 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7075 if (TARGET_HARD_FLOAT && mode == SFmode)
7076 *total = COSTS_N_INSNS (2);
7077 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7078 *total = COSTS_N_INSNS (4);
7080 *total = COSTS_N_INSNS (20);
7084 if (GET_CODE (XEXP (x, 1)) == REG)
7085 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7086 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7087 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7093 *total += COSTS_N_INSNS (4);
7098 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7099 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7102 *total += COSTS_N_INSNS (3);
7106 *total += COSTS_N_INSNS (1);
7107 /* Increase the cost of complex shifts because they aren't any faster,
7108 and reduce dual issue opportunities. */
7109 if (arm_tune_cortex_a9
7110 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7118 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7119 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7120 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7122 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7126 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7127 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7129 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7136 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7138 if (TARGET_HARD_FLOAT
7140 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7142 *total = COSTS_N_INSNS (1);
7143 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7144 && arm_const_double_rtx (XEXP (x, 0)))
7146 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7150 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7151 && arm_const_double_rtx (XEXP (x, 1)))
7153 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7159 *total = COSTS_N_INSNS (20);
7163 *total = COSTS_N_INSNS (1);
7164 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7165 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7167 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7171 subcode = GET_CODE (XEXP (x, 1));
7172 if (subcode == ASHIFT || subcode == ASHIFTRT
7173 || subcode == LSHIFTRT
7174 || subcode == ROTATE || subcode == ROTATERT)
7176 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7177 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7181 /* A shift as a part of RSB costs no more than RSB itself. */
7182 if (GET_CODE (XEXP (x, 0)) == MULT
7183 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7185 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7186 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7191 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7193 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7194 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7198 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7199 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7201 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7202 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7203 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7204 *total += COSTS_N_INSNS (1);
7212 if (code == PLUS && arm_arch6 && mode == SImode
7213 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7214 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7216 *total = COSTS_N_INSNS (1);
7217 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7219 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7223 /* MLA: All arguments must be registers. We filter out
7224 multiplication by a power of two, so that we fall down into
7226 if (GET_CODE (XEXP (x, 0)) == MULT
7227 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7229 /* The cost comes from the cost of the multiply. */
7233 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7235 if (TARGET_HARD_FLOAT
7237 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7239 *total = COSTS_N_INSNS (1);
7240 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7241 && arm_const_double_rtx (XEXP (x, 1)))
7243 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7250 *total = COSTS_N_INSNS (20);
7254 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7255 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7257 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7258 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7259 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7260 *total += COSTS_N_INSNS (1);
7266 case AND: case XOR: case IOR:
7268 /* Normally the frame registers will be spilt into reg+const during
7269 reload, so it is a bad idea to combine them with other instructions,
7270 since then they might not be moved outside of loops. As a compromise
7271 we allow integration with ops that have a constant as their second
7273 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7274 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7275 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7276 *total = COSTS_N_INSNS (1);
7280 *total += COSTS_N_INSNS (2);
7281 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7282 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7284 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7291 *total += COSTS_N_INSNS (1);
7292 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7293 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7295 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7298 subcode = GET_CODE (XEXP (x, 0));
7299 if (subcode == ASHIFT || subcode == ASHIFTRT
7300 || subcode == LSHIFTRT
7301 || subcode == ROTATE || subcode == ROTATERT)
7303 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7304 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7309 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7311 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7312 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7316 if (subcode == UMIN || subcode == UMAX
7317 || subcode == SMIN || subcode == SMAX)
7319 *total = COSTS_N_INSNS (3);
7326 /* This should have been handled by the CPU specific routines. */
7330 if (arm_arch3m && mode == SImode
7331 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7333 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7334 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7335 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7336 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7338 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7341 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7345 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7347 if (TARGET_HARD_FLOAT
7349 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7351 *total = COSTS_N_INSNS (1);
7354 *total = COSTS_N_INSNS (2);
7360 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7361 if (mode == SImode && code == NOT)
7363 subcode = GET_CODE (XEXP (x, 0));
7364 if (subcode == ASHIFT || subcode == ASHIFTRT
7365 || subcode == LSHIFTRT
7366 || subcode == ROTATE || subcode == ROTATERT
7368 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7370 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7371 /* Register shifts cost an extra cycle. */
7372 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7373 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7382 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7384 *total = COSTS_N_INSNS (4);
7388 operand = XEXP (x, 0);
7390 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7391 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7392 && GET_CODE (XEXP (operand, 0)) == REG
7393 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7394 *total += COSTS_N_INSNS (1);
7395 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7396 + rtx_cost (XEXP (x, 2), code, 2, speed));
7400 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7402 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7408 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7409 && mode == SImode && XEXP (x, 1) == const0_rtx)
7411 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7417 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7418 && mode == SImode && XEXP (x, 1) == const0_rtx)
7420 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7440 /* SCC insns. In the case where the comparison has already been
7441 performed, then they cost 2 instructions. Otherwise they need
7442 an additional comparison before them. */
7443 *total = COSTS_N_INSNS (2);
7444 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7451 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7457 *total += COSTS_N_INSNS (1);
7458 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7459 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7461 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7465 subcode = GET_CODE (XEXP (x, 0));
7466 if (subcode == ASHIFT || subcode == ASHIFTRT
7467 || subcode == LSHIFTRT
7468 || subcode == ROTATE || subcode == ROTATERT)
7470 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7471 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7476 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7478 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7479 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7489 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7490 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7491 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7492 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7496 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7498 if (TARGET_HARD_FLOAT
7500 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7502 *total = COSTS_N_INSNS (1);
7505 *total = COSTS_N_INSNS (20);
7508 *total = COSTS_N_INSNS (1);
7510 *total += COSTS_N_INSNS (3);
7516 if (GET_MODE_CLASS (mode) == MODE_INT)
7518 rtx op = XEXP (x, 0);
7519 enum machine_mode opmode = GET_MODE (op);
7522 *total += COSTS_N_INSNS (1);
7524 if (opmode != SImode)
7528 /* If !arm_arch4, we use one of the extendhisi2_mem
7529 or movhi_bytes patterns for HImode. For a QImode
7530 sign extension, we first zero-extend from memory
7531 and then perform a shift sequence. */
7532 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7533 *total += COSTS_N_INSNS (2);
7536 *total += COSTS_N_INSNS (1);
7538 /* We don't have the necessary insn, so we need to perform some
7540 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7541 /* An and with constant 255. */
7542 *total += COSTS_N_INSNS (1);
7544 /* A shift sequence. Increase costs slightly to avoid
7545 combining two shifts into an extend operation. */
7546 *total += COSTS_N_INSNS (2) + 1;
7552 switch (GET_MODE (XEXP (x, 0)))
7559 *total = COSTS_N_INSNS (1);
7569 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7573 if (const_ok_for_arm (INTVAL (x))
7574 || const_ok_for_arm (~INTVAL (x)))
7575 *total = COSTS_N_INSNS (1);
7577 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7578 INTVAL (x), NULL_RTX,
7585 *total = COSTS_N_INSNS (3);
7589 *total = COSTS_N_INSNS (1);
7593 *total = COSTS_N_INSNS (1);
7594 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7598 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7599 && (mode == SFmode || !TARGET_VFP_SINGLE))
7600 *total = COSTS_N_INSNS (1);
7602 *total = COSTS_N_INSNS (4);
7609 /* We cost this as high as our memory costs to allow this to
7610 be hoisted from loops. */
7611 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7613 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7618 *total = COSTS_N_INSNS (4);
7623 /* Estimates the size cost of thumb1 instructions.
7624 For now most of the code is copied from thumb1_rtx_costs. We need more
7625 fine grain tuning when we have more related test cases. */
7627 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7629 enum machine_mode mode = GET_MODE (x);
7642 return COSTS_N_INSNS (1);
7645 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7647 /* Thumb1 mul instruction can't operate on const. We must Load it
7648 into a register first. */
7649 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7650 return COSTS_N_INSNS (1) + const_size;
7652 return COSTS_N_INSNS (1);
7655 return (COSTS_N_INSNS (1)
7656 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7657 + GET_CODE (SET_DEST (x)) == MEM));
7662 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7663 return COSTS_N_INSNS (1);
7664 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7665 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7666 return COSTS_N_INSNS (2);
7667 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7668 if (thumb_shiftable_const (INTVAL (x)))
7669 return COSTS_N_INSNS (2);
7670 return COSTS_N_INSNS (3);
7672 else if ((outer == PLUS || outer == COMPARE)
7673 && INTVAL (x) < 256 && INTVAL (x) > -256)
7675 else if ((outer == IOR || outer == XOR || outer == AND)
7676 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7677 return COSTS_N_INSNS (1);
7678 else if (outer == AND)
7681 /* This duplicates the tests in the andsi3 expander. */
7682 for (i = 9; i <= 31; i++)
7683 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7684 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7685 return COSTS_N_INSNS (2);
7687 else if (outer == ASHIFT || outer == ASHIFTRT
7688 || outer == LSHIFTRT)
7690 return COSTS_N_INSNS (2);
7696 return COSTS_N_INSNS (3);
7714 /* XXX another guess. */
7715 /* Memory costs quite a lot for the first word, but subsequent words
7716 load at the equivalent of a single insn each. */
7717 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7718 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7723 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7728 /* XXX still guessing. */
7729 switch (GET_MODE (XEXP (x, 0)))
7732 return (1 + (mode == DImode ? 4 : 0)
7733 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7736 return (4 + (mode == DImode ? 4 : 0)
7737 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7740 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7751 /* RTX costs when optimizing for size. */
7753 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7756 enum machine_mode mode = GET_MODE (x);
7759 *total = thumb1_size_rtx_costs (x, code, outer_code);
7763 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7767 /* A memory access costs 1 insn if the mode is small, or the address is
7768 a single register, otherwise it costs one insn per word. */
7769 if (REG_P (XEXP (x, 0)))
7770 *total = COSTS_N_INSNS (1);
7772 && GET_CODE (XEXP (x, 0)) == PLUS
7773 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7774 /* This will be split into two instructions.
7775 See arm.md:calculate_pic_address. */
7776 *total = COSTS_N_INSNS (2);
7778 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7785 /* Needs a libcall, so it costs about this. */
7786 *total = COSTS_N_INSNS (2);
7790 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7792 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7800 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7802 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7805 else if (mode == SImode)
7807 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7808 /* Slightly disparage register shifts, but not by much. */
7809 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7810 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7814 /* Needs a libcall. */
7815 *total = COSTS_N_INSNS (2);
7819 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7820 && (mode == SFmode || !TARGET_VFP_SINGLE))
7822 *total = COSTS_N_INSNS (1);
7828 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7829 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7831 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7832 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7833 || subcode1 == ROTATE || subcode1 == ROTATERT
7834 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7835 || subcode1 == ASHIFTRT)
7837 /* It's just the cost of the two operands. */
7842 *total = COSTS_N_INSNS (1);
7846 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7850 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7851 && (mode == SFmode || !TARGET_VFP_SINGLE))
7853 *total = COSTS_N_INSNS (1);
7857 /* A shift as a part of ADD costs nothing. */
7858 if (GET_CODE (XEXP (x, 0)) == MULT
7859 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7861 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7862 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7863 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7868 case AND: case XOR: case IOR:
7871 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7873 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7874 || subcode == LSHIFTRT || subcode == ASHIFTRT
7875 || (code == AND && subcode == NOT))
7877 /* It's just the cost of the two operands. */
7883 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7887 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7891 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7892 && (mode == SFmode || !TARGET_VFP_SINGLE))
7894 *total = COSTS_N_INSNS (1);
7900 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7909 if (cc_register (XEXP (x, 0), VOIDmode))
7912 *total = COSTS_N_INSNS (1);
7916 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7917 && (mode == SFmode || !TARGET_VFP_SINGLE))
7918 *total = COSTS_N_INSNS (1);
7920 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7925 return arm_rtx_costs_1 (x, outer_code, total, 0);
7928 if (const_ok_for_arm (INTVAL (x)))
7929 /* A multiplication by a constant requires another instruction
7930 to load the constant to a register. */
7931 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7933 else if (const_ok_for_arm (~INTVAL (x)))
7934 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7935 else if (const_ok_for_arm (-INTVAL (x)))
7937 if (outer_code == COMPARE || outer_code == PLUS
7938 || outer_code == MINUS)
7941 *total = COSTS_N_INSNS (1);
7944 *total = COSTS_N_INSNS (2);
7950 *total = COSTS_N_INSNS (2);
7954 *total = COSTS_N_INSNS (4);
7959 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7960 cost of these slightly. */
7961 *total = COSTS_N_INSNS (1) + 1;
7968 if (mode != VOIDmode)
7969 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7971 *total = COSTS_N_INSNS (4); /* How knows? */
7976 /* RTX costs when optimizing for size. */
7978 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7979 int *total, bool speed)
7982 return arm_size_rtx_costs (x, (enum rtx_code) code,
7983 (enum rtx_code) outer_code, total);
7985 return current_tune->rtx_costs (x, (enum rtx_code) code,
7986 (enum rtx_code) outer_code,
7990 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7991 supported on any "slowmul" cores, so it can be ignored. */
7994 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7995 int *total, bool speed)
7997 enum machine_mode mode = GET_MODE (x);
8001 *total = thumb1_rtx_costs (x, code, outer_code);
8008 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8011 *total = COSTS_N_INSNS (20);
8015 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8017 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8018 & (unsigned HOST_WIDE_INT) 0xffffffff);
8019 int cost, const_ok = const_ok_for_arm (i);
8020 int j, booth_unit_size;
8022 /* Tune as appropriate. */
8023 cost = const_ok ? 4 : 8;
8024 booth_unit_size = 2;
8025 for (j = 0; i && j < 32; j += booth_unit_size)
8027 i >>= booth_unit_size;
8031 *total = COSTS_N_INSNS (cost);
8032 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8036 *total = COSTS_N_INSNS (20);
8040 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8045 /* RTX cost for cores with a fast multiply unit (M variants). */
8048 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8049 int *total, bool speed)
8051 enum machine_mode mode = GET_MODE (x);
8055 *total = thumb1_rtx_costs (x, code, outer_code);
8059 /* ??? should thumb2 use different costs? */
8063 /* There is no point basing this on the tuning, since it is always the
8064 fast variant if it exists at all. */
8066 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8067 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8068 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8070 *total = COSTS_N_INSNS(2);
8077 *total = COSTS_N_INSNS (5);
8081 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8083 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8084 & (unsigned HOST_WIDE_INT) 0xffffffff);
8085 int cost, const_ok = const_ok_for_arm (i);
8086 int j, booth_unit_size;
8088 /* Tune as appropriate. */
8089 cost = const_ok ? 4 : 8;
8090 booth_unit_size = 8;
8091 for (j = 0; i && j < 32; j += booth_unit_size)
8093 i >>= booth_unit_size;
8097 *total = COSTS_N_INSNS(cost);
8103 *total = COSTS_N_INSNS (4);
8107 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8109 if (TARGET_HARD_FLOAT
8111 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8113 *total = COSTS_N_INSNS (1);
8118 /* Requires a lib call */
8119 *total = COSTS_N_INSNS (20);
8123 return arm_rtx_costs_1 (x, outer_code, total, speed);
8128 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8129 so it can be ignored. */
8132 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8133 int *total, bool speed)
8135 enum machine_mode mode = GET_MODE (x);
8139 *total = thumb1_rtx_costs (x, code, outer_code);
8146 if (GET_CODE (XEXP (x, 0)) != MULT)
8147 return arm_rtx_costs_1 (x, outer_code, total, speed);
8149 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8150 will stall until the multiplication is complete. */
8151 *total = COSTS_N_INSNS (3);
8155 /* There is no point basing this on the tuning, since it is always the
8156 fast variant if it exists at all. */
8158 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8159 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8160 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8162 *total = COSTS_N_INSNS (2);
8169 *total = COSTS_N_INSNS (5);
8173 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8175 /* If operand 1 is a constant we can more accurately
8176 calculate the cost of the multiply. The multiplier can
8177 retire 15 bits on the first cycle and a further 12 on the
8178 second. We do, of course, have to load the constant into
8179 a register first. */
8180 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8181 /* There's a general overhead of one cycle. */
8183 unsigned HOST_WIDE_INT masked_const;
8188 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8190 masked_const = i & 0xffff8000;
8191 if (masked_const != 0)
8194 masked_const = i & 0xf8000000;
8195 if (masked_const != 0)
8198 *total = COSTS_N_INSNS (cost);
8204 *total = COSTS_N_INSNS (3);
8208 /* Requires a lib call */
8209 *total = COSTS_N_INSNS (20);
8213 return arm_rtx_costs_1 (x, outer_code, total, speed);
8218 /* RTX costs for 9e (and later) cores. */
8221 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8222 int *total, bool speed)
8224 enum machine_mode mode = GET_MODE (x);
8231 *total = COSTS_N_INSNS (3);
8235 *total = thumb1_rtx_costs (x, code, outer_code);
8243 /* There is no point basing this on the tuning, since it is always the
8244 fast variant if it exists at all. */
8246 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8247 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8248 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8250 *total = COSTS_N_INSNS (2);
8257 *total = COSTS_N_INSNS (5);
8263 *total = COSTS_N_INSNS (2);
8267 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8269 if (TARGET_HARD_FLOAT
8271 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8273 *total = COSTS_N_INSNS (1);
8278 *total = COSTS_N_INSNS (20);
8282 return arm_rtx_costs_1 (x, outer_code, total, speed);
8285 /* All address computations that can be done are free, but rtx cost returns
8286 the same for practically all of them. So we weight the different types
8287 of address here in the order (most pref first):
8288 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8290 arm_arm_address_cost (rtx x)
8292 enum rtx_code c = GET_CODE (x);
8294 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8296 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8301 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8304 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8314 arm_thumb_address_cost (rtx x)
8316 enum rtx_code c = GET_CODE (x);
8321 && GET_CODE (XEXP (x, 0)) == REG
8322 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8329 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8331 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8334 /* Adjust cost hook for XScale. */
8336 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8338 /* Some true dependencies can have a higher cost depending
8339 on precisely how certain input operands are used. */
8340 if (REG_NOTE_KIND(link) == 0
8341 && recog_memoized (insn) >= 0
8342 && recog_memoized (dep) >= 0)
8344 int shift_opnum = get_attr_shift (insn);
8345 enum attr_type attr_type = get_attr_type (dep);
8347 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8348 operand for INSN. If we have a shifted input operand and the
8349 instruction we depend on is another ALU instruction, then we may
8350 have to account for an additional stall. */
8351 if (shift_opnum != 0
8352 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8354 rtx shifted_operand;
8357 /* Get the shifted operand. */
8358 extract_insn (insn);
8359 shifted_operand = recog_data.operand[shift_opnum];
8361 /* Iterate over all the operands in DEP. If we write an operand
8362 that overlaps with SHIFTED_OPERAND, then we have increase the
8363 cost of this dependency. */
8365 preprocess_constraints ();
8366 for (opno = 0; opno < recog_data.n_operands; opno++)
8368 /* We can ignore strict inputs. */
8369 if (recog_data.operand_type[opno] == OP_IN)
8372 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8384 /* Adjust cost hook for Cortex A9. */
8386 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8388 switch (REG_NOTE_KIND (link))
8395 case REG_DEP_OUTPUT:
8396 if (recog_memoized (insn) >= 0
8397 && recog_memoized (dep) >= 0)
8399 if (GET_CODE (PATTERN (insn)) == SET)
8402 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8404 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8406 enum attr_type attr_type_insn = get_attr_type (insn);
8407 enum attr_type attr_type_dep = get_attr_type (dep);
8409 /* By default all dependencies of the form
8412 have an extra latency of 1 cycle because
8413 of the input and output dependency in this
8414 case. However this gets modeled as an true
8415 dependency and hence all these checks. */
8416 if (REG_P (SET_DEST (PATTERN (insn)))
8417 && REG_P (SET_DEST (PATTERN (dep)))
8418 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8419 SET_DEST (PATTERN (dep))))
8421 /* FMACS is a special case where the dependent
8422 instruction can be issued 3 cycles before
8423 the normal latency in case of an output
8425 if ((attr_type_insn == TYPE_FMACS
8426 || attr_type_insn == TYPE_FMACD)
8427 && (attr_type_dep == TYPE_FMACS
8428 || attr_type_dep == TYPE_FMACD))
8430 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8431 *cost = insn_default_latency (dep) - 3;
8433 *cost = insn_default_latency (dep);
8438 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8439 *cost = insn_default_latency (dep) + 1;
8441 *cost = insn_default_latency (dep);
8457 /* Adjust cost hook for FA726TE. */
8459 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8461 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8462 have penalty of 3. */
8463 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8464 && recog_memoized (insn) >= 0
8465 && recog_memoized (dep) >= 0
8466 && get_attr_conds (dep) == CONDS_SET)
8468 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8469 if (get_attr_conds (insn) == CONDS_USE
8470 && get_attr_type (insn) != TYPE_BRANCH)
8476 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8477 || get_attr_conds (insn) == CONDS_USE)
8487 /* Implement TARGET_REGISTER_MOVE_COST.
8489 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8490 it is typically more expensive than a single memory access. We set
8491 the cost to less than two memory accesses so that floating
8492 point to integer conversion does not go through memory. */
8495 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8496 reg_class_t from, reg_class_t to)
8500 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8501 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8503 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8504 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8506 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8513 if (from == HI_REGS || to == HI_REGS)
8520 /* Implement TARGET_MEMORY_MOVE_COST. */
8523 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8524 bool in ATTRIBUTE_UNUSED)
8530 if (GET_MODE_SIZE (mode) < 4)
8533 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8537 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8538 It corrects the value of COST based on the relationship between
8539 INSN and DEP through the dependence LINK. It returns the new
8540 value. There is a per-core adjust_cost hook to adjust scheduler costs
8541 and the per-core hook can choose to completely override the generic
8542 adjust_cost function. Only put bits of code into arm_adjust_cost that
8543 are common across all cores. */
8545 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8549 /* When generating Thumb-1 code, we want to place flag-setting operations
8550 close to a conditional branch which depends on them, so that we can
8551 omit the comparison. */
8553 && REG_NOTE_KIND (link) == 0
8554 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8555 && recog_memoized (dep) >= 0
8556 && get_attr_conds (dep) == CONDS_SET)
8559 if (current_tune->sched_adjust_cost != NULL)
8561 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8565 /* XXX Is this strictly true? */
8566 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8567 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8570 /* Call insns don't incur a stall, even if they follow a load. */
8571 if (REG_NOTE_KIND (link) == 0
8572 && GET_CODE (insn) == CALL_INSN)
8575 if ((i_pat = single_set (insn)) != NULL
8576 && GET_CODE (SET_SRC (i_pat)) == MEM
8577 && (d_pat = single_set (dep)) != NULL
8578 && GET_CODE (SET_DEST (d_pat)) == MEM)
8580 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8581 /* This is a load after a store, there is no conflict if the load reads
8582 from a cached area. Assume that loads from the stack, and from the
8583 constant pool are cached, and that others will miss. This is a
8586 if ((GET_CODE (src_mem) == SYMBOL_REF
8587 && CONSTANT_POOL_ADDRESS_P (src_mem))
8588 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8589 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8590 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8598 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8601 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8603 return (optimize > 0) ? 2 : 0;
8607 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8609 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8612 static bool fp_consts_inited = false;
8614 static REAL_VALUE_TYPE value_fp0;
8617 init_fp_table (void)
8621 r = REAL_VALUE_ATOF ("0", DFmode);
8623 fp_consts_inited = true;
8626 /* Return TRUE if rtx X is a valid immediate FP constant. */
8628 arm_const_double_rtx (rtx x)
8632 if (!fp_consts_inited)
8635 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8636 if (REAL_VALUE_MINUS_ZERO (r))
8639 if (REAL_VALUES_EQUAL (r, value_fp0))
8645 /* VFPv3 has a fairly wide range of representable immediates, formed from
8646 "quarter-precision" floating-point values. These can be evaluated using this
8647 formula (with ^ for exponentiation):
8651 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8652 16 <= n <= 31 and 0 <= r <= 7.
8654 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8656 - A (most-significant) is the sign bit.
8657 - BCD are the exponent (encoded as r XOR 3).
8658 - EFGH are the mantissa (encoded as n - 16).
8661 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8662 fconst[sd] instruction, or -1 if X isn't suitable. */
8664 vfp3_const_double_index (rtx x)
8666 REAL_VALUE_TYPE r, m;
8668 unsigned HOST_WIDE_INT mantissa, mant_hi;
8669 unsigned HOST_WIDE_INT mask;
8670 HOST_WIDE_INT m1, m2;
8671 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8673 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8676 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8678 /* We can't represent these things, so detect them first. */
8679 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8682 /* Extract sign, exponent and mantissa. */
8683 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8684 r = real_value_abs (&r);
8685 exponent = REAL_EXP (&r);
8686 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8687 highest (sign) bit, with a fixed binary point at bit point_pos.
8688 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8689 bits for the mantissa, this may fail (low bits would be lost). */
8690 real_ldexp (&m, &r, point_pos - exponent);
8691 REAL_VALUE_TO_INT (&m1, &m2, m);
8695 /* If there are bits set in the low part of the mantissa, we can't
8696 represent this value. */
8700 /* Now make it so that mantissa contains the most-significant bits, and move
8701 the point_pos to indicate that the least-significant bits have been
8703 point_pos -= HOST_BITS_PER_WIDE_INT;
8706 /* We can permit four significant bits of mantissa only, plus a high bit
8707 which is always 1. */
8708 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8709 if ((mantissa & mask) != 0)
8712 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8713 mantissa >>= point_pos - 5;
8715 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8716 floating-point immediate zero with Neon using an integer-zero load, but
8717 that case is handled elsewhere.) */
8721 gcc_assert (mantissa >= 16 && mantissa <= 31);
8723 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8724 normalized significands are in the range [1, 2). (Our mantissa is shifted
8725 left 4 places at this point relative to normalized IEEE754 values). GCC
8726 internally uses [0.5, 1) (see real.c), so the exponent returned from
8727 REAL_EXP must be altered. */
8728 exponent = 5 - exponent;
8730 if (exponent < 0 || exponent > 7)
8733 /* Sign, mantissa and exponent are now in the correct form to plug into the
8734 formula described in the comment above. */
8735 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8738 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8740 vfp3_const_double_rtx (rtx x)
8745 return vfp3_const_double_index (x) != -1;
8748 /* Recognize immediates which can be used in various Neon instructions. Legal
8749 immediates are described by the following table (for VMVN variants, the
8750 bitwise inverse of the constant shown is recognized. In either case, VMOV
8751 is output and the correct instruction to use for a given constant is chosen
8752 by the assembler). The constant shown is replicated across all elements of
8753 the destination vector.
8755 insn elems variant constant (binary)
8756 ---- ----- ------- -----------------
8757 vmov i32 0 00000000 00000000 00000000 abcdefgh
8758 vmov i32 1 00000000 00000000 abcdefgh 00000000
8759 vmov i32 2 00000000 abcdefgh 00000000 00000000
8760 vmov i32 3 abcdefgh 00000000 00000000 00000000
8761 vmov i16 4 00000000 abcdefgh
8762 vmov i16 5 abcdefgh 00000000
8763 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8764 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8765 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8766 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8767 vmvn i16 10 00000000 abcdefgh
8768 vmvn i16 11 abcdefgh 00000000
8769 vmov i32 12 00000000 00000000 abcdefgh 11111111
8770 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8771 vmov i32 14 00000000 abcdefgh 11111111 11111111
8772 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8774 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8775 eeeeeeee ffffffff gggggggg hhhhhhhh
8776 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8778 For case 18, B = !b. Representable values are exactly those accepted by
8779 vfp3_const_double_index, but are output as floating-point numbers rather
8782 Variants 0-5 (inclusive) may also be used as immediates for the second
8783 operand of VORR/VBIC instructions.
8785 The INVERSE argument causes the bitwise inverse of the given operand to be
8786 recognized instead (used for recognizing legal immediates for the VAND/VORN
8787 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8788 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8789 output, rather than the real insns vbic/vorr).
8791 INVERSE makes no difference to the recognition of float vectors.
8793 The return value is the variant of immediate as shown in the above table, or
8794 -1 if the given value doesn't match any of the listed patterns.
8797 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8798 rtx *modconst, int *elementwidth)
8800 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8802 for (i = 0; i < idx; i += (STRIDE)) \
8807 immtype = (CLASS); \
8808 elsize = (ELSIZE); \
8812 unsigned int i, elsize = 0, idx = 0, n_elts;
8813 unsigned int innersize;
8814 unsigned char bytes[16];
8815 int immtype = -1, matches;
8816 unsigned int invmask = inverse ? 0xff : 0;
8817 bool vector = GET_CODE (op) == CONST_VECTOR;
8821 n_elts = CONST_VECTOR_NUNITS (op);
8822 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8827 if (mode == VOIDmode)
8829 innersize = GET_MODE_SIZE (mode);
8832 /* Vectors of float constants. */
8833 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8835 rtx el0 = CONST_VECTOR_ELT (op, 0);
8838 if (!vfp3_const_double_rtx (el0))
8841 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8843 for (i = 1; i < n_elts; i++)
8845 rtx elt = CONST_VECTOR_ELT (op, i);
8848 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8850 if (!REAL_VALUES_EQUAL (r0, re))
8855 *modconst = CONST_VECTOR_ELT (op, 0);
8863 /* Splat vector constant out into a byte vector. */
8864 for (i = 0; i < n_elts; i++)
8866 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8867 unsigned HOST_WIDE_INT elpart;
8868 unsigned int part, parts;
8870 if (GET_CODE (el) == CONST_INT)
8872 elpart = INTVAL (el);
8875 else if (GET_CODE (el) == CONST_DOUBLE)
8877 elpart = CONST_DOUBLE_LOW (el);
8883 for (part = 0; part < parts; part++)
8886 for (byte = 0; byte < innersize; byte++)
8888 bytes[idx++] = (elpart & 0xff) ^ invmask;
8889 elpart >>= BITS_PER_UNIT;
8891 if (GET_CODE (el) == CONST_DOUBLE)
8892 elpart = CONST_DOUBLE_HIGH (el);
8897 gcc_assert (idx == GET_MODE_SIZE (mode));
8901 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8902 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8904 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8905 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8907 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8908 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8910 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8911 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8913 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8915 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8917 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8918 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8920 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8921 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8923 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8924 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8926 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8927 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8929 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8931 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8933 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8934 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8936 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8937 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8939 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8940 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8942 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8943 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8945 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8947 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8948 && bytes[i] == bytes[(i + 8) % idx]);
8956 *elementwidth = elsize;
8960 unsigned HOST_WIDE_INT imm = 0;
8962 /* Un-invert bytes of recognized vector, if necessary. */
8964 for (i = 0; i < idx; i++)
8965 bytes[i] ^= invmask;
8969 /* FIXME: Broken on 32-bit H_W_I hosts. */
8970 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8972 for (i = 0; i < 8; i++)
8973 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8974 << (i * BITS_PER_UNIT);
8976 *modconst = GEN_INT (imm);
8980 unsigned HOST_WIDE_INT imm = 0;
8982 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8983 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8985 *modconst = GEN_INT (imm);
8993 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8994 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8995 float elements), and a modified constant (whatever should be output for a
8996 VMOV) in *MODCONST. */
8999 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9000 rtx *modconst, int *elementwidth)
9004 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9010 *modconst = tmpconst;
9013 *elementwidth = tmpwidth;
9018 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9019 the immediate is valid, write a constant suitable for using as an operand
9020 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9021 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9024 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9025 rtx *modconst, int *elementwidth)
9029 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9031 if (retval < 0 || retval > 5)
9035 *modconst = tmpconst;
9038 *elementwidth = tmpwidth;
9043 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9044 the immediate is valid, write a constant suitable for using as an operand
9045 to VSHR/VSHL to *MODCONST and the corresponding element width to
9046 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9047 because they have different limitations. */
9050 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9051 rtx *modconst, int *elementwidth,
9054 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9055 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9056 unsigned HOST_WIDE_INT last_elt = 0;
9057 unsigned HOST_WIDE_INT maxshift;
9059 /* Split vector constant out into a byte vector. */
9060 for (i = 0; i < n_elts; i++)
9062 rtx el = CONST_VECTOR_ELT (op, i);
9063 unsigned HOST_WIDE_INT elpart;
9065 if (GET_CODE (el) == CONST_INT)
9066 elpart = INTVAL (el);
9067 else if (GET_CODE (el) == CONST_DOUBLE)
9072 if (i != 0 && elpart != last_elt)
9078 /* Shift less than element size. */
9079 maxshift = innersize * 8;
9083 /* Left shift immediate value can be from 0 to <size>-1. */
9084 if (last_elt >= maxshift)
9089 /* Right shift immediate value can be from 1 to <size>. */
9090 if (last_elt == 0 || last_elt > maxshift)
9095 *elementwidth = innersize * 8;
9098 *modconst = CONST_VECTOR_ELT (op, 0);
9103 /* Return a string suitable for output of Neon immediate logic operation
9107 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9108 int inverse, int quad)
9110 int width, is_valid;
9111 static char templ[40];
9113 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9115 gcc_assert (is_valid != 0);
9118 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9120 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9125 /* Return a string suitable for output of Neon immediate shift operation
9126 (VSHR or VSHL) MNEM. */
9129 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9130 enum machine_mode mode, int quad,
9133 int width, is_valid;
9134 static char templ[40];
9136 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9137 gcc_assert (is_valid != 0);
9140 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9142 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9147 /* Output a sequence of pairwise operations to implement a reduction.
9148 NOTE: We do "too much work" here, because pairwise operations work on two
9149 registers-worth of operands in one go. Unfortunately we can't exploit those
9150 extra calculations to do the full operation in fewer steps, I don't think.
9151 Although all vector elements of the result but the first are ignored, we
9152 actually calculate the same result in each of the elements. An alternative
9153 such as initially loading a vector with zero to use as each of the second
9154 operands would use up an additional register and take an extra instruction,
9155 for no particular gain. */
9158 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9159 rtx (*reduc) (rtx, rtx, rtx))
9161 enum machine_mode inner = GET_MODE_INNER (mode);
9162 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9165 for (i = parts / 2; i >= 1; i /= 2)
9167 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9168 emit_insn (reduc (dest, tmpsum, tmpsum));
9173 /* If VALS is a vector constant that can be loaded into a register
9174 using VDUP, generate instructions to do so and return an RTX to
9175 assign to the register. Otherwise return NULL_RTX. */
9178 neon_vdup_constant (rtx vals)
9180 enum machine_mode mode = GET_MODE (vals);
9181 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9182 int n_elts = GET_MODE_NUNITS (mode);
9183 bool all_same = true;
9187 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9190 for (i = 0; i < n_elts; ++i)
9192 x = XVECEXP (vals, 0, i);
9193 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9198 /* The elements are not all the same. We could handle repeating
9199 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9200 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9204 /* We can load this constant by using VDUP and a constant in a
9205 single ARM register. This will be cheaper than a vector
9208 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9209 return gen_rtx_VEC_DUPLICATE (mode, x);
9212 /* Generate code to load VALS, which is a PARALLEL containing only
9213 constants (for vec_init) or CONST_VECTOR, efficiently into a
9214 register. Returns an RTX to copy into the register, or NULL_RTX
9215 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9218 neon_make_constant (rtx vals)
9220 enum machine_mode mode = GET_MODE (vals);
9222 rtx const_vec = NULL_RTX;
9223 int n_elts = GET_MODE_NUNITS (mode);
9227 if (GET_CODE (vals) == CONST_VECTOR)
9229 else if (GET_CODE (vals) == PARALLEL)
9231 /* A CONST_VECTOR must contain only CONST_INTs and
9232 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9233 Only store valid constants in a CONST_VECTOR. */
9234 for (i = 0; i < n_elts; ++i)
9236 rtx x = XVECEXP (vals, 0, i);
9237 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9240 if (n_const == n_elts)
9241 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9246 if (const_vec != NULL
9247 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9248 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9250 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9251 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9252 pipeline cycle; creating the constant takes one or two ARM
9255 else if (const_vec != NULL_RTX)
9256 /* Load from constant pool. On Cortex-A8 this takes two cycles
9257 (for either double or quad vectors). We can not take advantage
9258 of single-cycle VLD1 because we need a PC-relative addressing
9262 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9263 We can not construct an initializer. */
9267 /* Initialize vector TARGET to VALS. */
9270 neon_expand_vector_init (rtx target, rtx vals)
9272 enum machine_mode mode = GET_MODE (target);
9273 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9274 int n_elts = GET_MODE_NUNITS (mode);
9275 int n_var = 0, one_var = -1;
9276 bool all_same = true;
9280 for (i = 0; i < n_elts; ++i)
9282 x = XVECEXP (vals, 0, i);
9283 if (!CONSTANT_P (x))
9284 ++n_var, one_var = i;
9286 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9292 rtx constant = neon_make_constant (vals);
9293 if (constant != NULL_RTX)
9295 emit_move_insn (target, constant);
9300 /* Splat a single non-constant element if we can. */
9301 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9303 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9304 emit_insn (gen_rtx_SET (VOIDmode, target,
9305 gen_rtx_VEC_DUPLICATE (mode, x)));
9309 /* One field is non-constant. Load constant then overwrite varying
9310 field. This is more efficient than using the stack. */
9313 rtx copy = copy_rtx (vals);
9314 rtx index = GEN_INT (one_var);
9316 /* Load constant part of vector, substitute neighboring value for
9318 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9319 neon_expand_vector_init (target, copy);
9321 /* Insert variable. */
9322 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9326 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9329 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9332 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9335 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9338 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9341 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9344 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9347 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9350 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9358 /* Construct the vector in memory one field at a time
9359 and load the whole vector. */
9360 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9361 for (i = 0; i < n_elts; i++)
9362 emit_move_insn (adjust_address_nv (mem, inner_mode,
9363 i * GET_MODE_SIZE (inner_mode)),
9364 XVECEXP (vals, 0, i));
9365 emit_move_insn (target, mem);
9368 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9369 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9370 reported source locations are bogus. */
9373 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9378 gcc_assert (GET_CODE (operand) == CONST_INT);
9380 lane = INTVAL (operand);
9382 if (lane < low || lane >= high)
9386 /* Bounds-check lanes. */
9389 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9391 bounds_check (operand, low, high, "lane out of range");
9394 /* Bounds-check constants. */
9397 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9399 bounds_check (operand, low, high, "constant out of range");
9403 neon_element_bits (enum machine_mode mode)
9406 return GET_MODE_BITSIZE (mode);
9408 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9412 /* Predicates for `match_operand' and `match_operator'. */
9414 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9415 WB is true if full writeback address modes are allowed and is false
9416 if limited writeback address modes (POST_INC and PRE_DEC) are
9420 arm_coproc_mem_operand (rtx op, bool wb)
9424 /* Reject eliminable registers. */
9425 if (! (reload_in_progress || reload_completed)
9426 && ( reg_mentioned_p (frame_pointer_rtx, op)
9427 || reg_mentioned_p (arg_pointer_rtx, op)
9428 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9429 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9430 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9431 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9434 /* Constants are converted into offsets from labels. */
9435 if (GET_CODE (op) != MEM)
9440 if (reload_completed
9441 && (GET_CODE (ind) == LABEL_REF
9442 || (GET_CODE (ind) == CONST
9443 && GET_CODE (XEXP (ind, 0)) == PLUS
9444 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9445 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9448 /* Match: (mem (reg)). */
9449 if (GET_CODE (ind) == REG)
9450 return arm_address_register_rtx_p (ind, 0);
9452 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9453 acceptable in any case (subject to verification by
9454 arm_address_register_rtx_p). We need WB to be true to accept
9455 PRE_INC and POST_DEC. */
9456 if (GET_CODE (ind) == POST_INC
9457 || GET_CODE (ind) == PRE_DEC
9459 && (GET_CODE (ind) == PRE_INC
9460 || GET_CODE (ind) == POST_DEC)))
9461 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9464 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9465 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9466 && GET_CODE (XEXP (ind, 1)) == PLUS
9467 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9468 ind = XEXP (ind, 1);
9473 if (GET_CODE (ind) == PLUS
9474 && GET_CODE (XEXP (ind, 0)) == REG
9475 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9476 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9477 && INTVAL (XEXP (ind, 1)) > -1024
9478 && INTVAL (XEXP (ind, 1)) < 1024
9479 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9485 /* Return TRUE if OP is a memory operand which we can load or store a vector
9486 to/from. TYPE is one of the following values:
9487 0 - Vector load/stor (vldr)
9488 1 - Core registers (ldm)
9489 2 - Element/structure loads (vld1)
9492 neon_vector_mem_operand (rtx op, int type)
9496 /* Reject eliminable registers. */
9497 if (! (reload_in_progress || reload_completed)
9498 && ( reg_mentioned_p (frame_pointer_rtx, op)
9499 || reg_mentioned_p (arg_pointer_rtx, op)
9500 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9501 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9502 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9503 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9506 /* Constants are converted into offsets from labels. */
9507 if (GET_CODE (op) != MEM)
9512 if (reload_completed
9513 && (GET_CODE (ind) == LABEL_REF
9514 || (GET_CODE (ind) == CONST
9515 && GET_CODE (XEXP (ind, 0)) == PLUS
9516 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9517 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9520 /* Match: (mem (reg)). */
9521 if (GET_CODE (ind) == REG)
9522 return arm_address_register_rtx_p (ind, 0);
9524 /* Allow post-increment with Neon registers. */
9525 if ((type != 1 && GET_CODE (ind) == POST_INC)
9526 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9527 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9529 /* FIXME: vld1 allows register post-modify. */
9535 && GET_CODE (ind) == PLUS
9536 && GET_CODE (XEXP (ind, 0)) == REG
9537 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9538 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9539 && INTVAL (XEXP (ind, 1)) > -1024
9540 && INTVAL (XEXP (ind, 1)) < 1016
9541 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9547 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9550 neon_struct_mem_operand (rtx op)
9554 /* Reject eliminable registers. */
9555 if (! (reload_in_progress || reload_completed)
9556 && ( reg_mentioned_p (frame_pointer_rtx, op)
9557 || reg_mentioned_p (arg_pointer_rtx, op)
9558 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9559 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9560 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9561 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9564 /* Constants are converted into offsets from labels. */
9565 if (GET_CODE (op) != MEM)
9570 if (reload_completed
9571 && (GET_CODE (ind) == LABEL_REF
9572 || (GET_CODE (ind) == CONST
9573 && GET_CODE (XEXP (ind, 0)) == PLUS
9574 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9575 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9578 /* Match: (mem (reg)). */
9579 if (GET_CODE (ind) == REG)
9580 return arm_address_register_rtx_p (ind, 0);
9582 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9583 if (GET_CODE (ind) == POST_INC
9584 || GET_CODE (ind) == PRE_DEC)
9585 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9590 /* Return true if X is a register that will be eliminated later on. */
9592 arm_eliminable_register (rtx x)
9594 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9595 || REGNO (x) == ARG_POINTER_REGNUM
9596 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9597 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9600 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9601 coprocessor registers. Otherwise return NO_REGS. */
9604 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9608 if (!TARGET_NEON_FP16)
9609 return GENERAL_REGS;
9610 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9612 return GENERAL_REGS;
9615 /* The neon move patterns handle all legitimate vector and struct
9618 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9619 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9620 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9621 || VALID_NEON_STRUCT_MODE (mode)))
9624 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9627 return GENERAL_REGS;
9630 /* Values which must be returned in the most-significant end of the return
9634 arm_return_in_msb (const_tree valtype)
9636 return (TARGET_AAPCS_BASED
9638 && (AGGREGATE_TYPE_P (valtype)
9639 || TREE_CODE (valtype) == COMPLEX_TYPE
9640 || FIXED_POINT_TYPE_P (valtype)));
9643 /* Return TRUE if X references a SYMBOL_REF. */
9645 symbol_mentioned_p (rtx x)
9650 if (GET_CODE (x) == SYMBOL_REF)
9653 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9654 are constant offsets, not symbols. */
9655 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9658 fmt = GET_RTX_FORMAT (GET_CODE (x));
9660 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9666 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9667 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9670 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9677 /* Return TRUE if X references a LABEL_REF. */
9679 label_mentioned_p (rtx x)
9684 if (GET_CODE (x) == LABEL_REF)
9687 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9688 instruction, but they are constant offsets, not symbols. */
9689 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9692 fmt = GET_RTX_FORMAT (GET_CODE (x));
9693 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9699 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9700 if (label_mentioned_p (XVECEXP (x, i, j)))
9703 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9711 tls_mentioned_p (rtx x)
9713 switch (GET_CODE (x))
9716 return tls_mentioned_p (XEXP (x, 0));
9719 if (XINT (x, 1) == UNSPEC_TLS)
9727 /* Must not copy any rtx that uses a pc-relative address. */
9730 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9732 if (GET_CODE (*x) == UNSPEC
9733 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9734 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9740 arm_cannot_copy_insn_p (rtx insn)
9742 /* The tls call insn cannot be copied, as it is paired with a data
9744 if (recog_memoized (insn) == CODE_FOR_tlscall)
9747 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9753 enum rtx_code code = GET_CODE (x);
9770 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9773 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9774 int *mask, bool *signed_sat)
9776 /* The high bound must be a power of two minus one. */
9777 int log = exact_log2 (INTVAL (hi_bound) + 1);
9781 /* The low bound is either zero (for usat) or one less than the
9782 negation of the high bound (for ssat). */
9783 if (INTVAL (lo_bound) == 0)
9788 *signed_sat = false;
9793 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9806 /* Return 1 if memory locations are adjacent. */
9808 adjacent_mem_locations (rtx a, rtx b)
9810 /* We don't guarantee to preserve the order of these memory refs. */
9811 if (volatile_refs_p (a) || volatile_refs_p (b))
9814 if ((GET_CODE (XEXP (a, 0)) == REG
9815 || (GET_CODE (XEXP (a, 0)) == PLUS
9816 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9817 && (GET_CODE (XEXP (b, 0)) == REG
9818 || (GET_CODE (XEXP (b, 0)) == PLUS
9819 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9821 HOST_WIDE_INT val0 = 0, val1 = 0;
9825 if (GET_CODE (XEXP (a, 0)) == PLUS)
9827 reg0 = XEXP (XEXP (a, 0), 0);
9828 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9833 if (GET_CODE (XEXP (b, 0)) == PLUS)
9835 reg1 = XEXP (XEXP (b, 0), 0);
9836 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9841 /* Don't accept any offset that will require multiple
9842 instructions to handle, since this would cause the
9843 arith_adjacentmem pattern to output an overlong sequence. */
9844 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9847 /* Don't allow an eliminable register: register elimination can make
9848 the offset too large. */
9849 if (arm_eliminable_register (reg0))
9852 val_diff = val1 - val0;
9856 /* If the target has load delay slots, then there's no benefit
9857 to using an ldm instruction unless the offset is zero and
9858 we are optimizing for size. */
9859 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9860 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9861 && (val_diff == 4 || val_diff == -4));
9864 return ((REGNO (reg0) == REGNO (reg1))
9865 && (val_diff == 4 || val_diff == -4));
9871 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9872 for load operations, false for store operations. CONSECUTIVE is true
9873 if the register numbers in the operation must be consecutive in the register
9874 bank. RETURN_PC is true if value is to be loaded in PC.
9875 The pattern we are trying to match for load is:
9876 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9877 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9880 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9883 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
9884 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
9885 3. If consecutive is TRUE, then for kth register being loaded,
9886 REGNO (R_dk) = REGNO (R_d0) + k.
9887 The pattern for store is similar. */
9889 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
9890 bool consecutive, bool return_pc)
9892 HOST_WIDE_INT count = XVECLEN (op, 0);
9895 unsigned first_regno;
9896 HOST_WIDE_INT i = 1, base = 0, offset = 0;
9898 bool addr_reg_in_reglist = false;
9899 bool update = false;
9904 /* If not in SImode, then registers must be consecutive
9905 (e.g., VLDM instructions for DFmode). */
9906 gcc_assert ((mode == SImode) || consecutive);
9907 /* Setting return_pc for stores is illegal. */
9908 gcc_assert (!return_pc || load);
9910 /* Set up the increments and the regs per val based on the mode. */
9911 reg_increment = GET_MODE_SIZE (mode);
9912 regs_per_val = reg_increment / 4;
9913 offset_adj = return_pc ? 1 : 0;
9916 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
9917 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
9920 /* Check if this is a write-back. */
9921 elt = XVECEXP (op, 0, offset_adj);
9922 if (GET_CODE (SET_SRC (elt)) == PLUS)
9928 /* The offset adjustment must be the number of registers being
9929 popped times the size of a single register. */
9930 if (!REG_P (SET_DEST (elt))
9931 || !REG_P (XEXP (SET_SRC (elt), 0))
9932 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
9933 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
9934 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
9935 ((count - 1 - offset_adj) * reg_increment))
9940 base = base + offset_adj;
9941 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
9942 success depends on the type: VLDM can do just one reg,
9943 LDM must do at least two. */
9944 if ((count <= i) && (mode == SImode))
9947 elt = XVECEXP (op, 0, i - 1);
9948 if (GET_CODE (elt) != SET)
9953 reg = SET_DEST (elt);
9954 mem = SET_SRC (elt);
9958 reg = SET_SRC (elt);
9959 mem = SET_DEST (elt);
9962 if (!REG_P (reg) || !MEM_P (mem))
9965 regno = REGNO (reg);
9966 first_regno = regno;
9967 addr = XEXP (mem, 0);
9968 if (GET_CODE (addr) == PLUS)
9970 if (!CONST_INT_P (XEXP (addr, 1)))
9973 offset = INTVAL (XEXP (addr, 1));
9974 addr = XEXP (addr, 0);
9980 /* Don't allow SP to be loaded unless it is also the base register. It
9981 guarantees that SP is reset correctly when an LDM instruction
9982 is interruptted. Otherwise, we might end up with a corrupt stack. */
9983 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
9986 for (; i < count; i++)
9988 elt = XVECEXP (op, 0, i);
9989 if (GET_CODE (elt) != SET)
9994 reg = SET_DEST (elt);
9995 mem = SET_SRC (elt);
9999 reg = SET_SRC (elt);
10000 mem = SET_DEST (elt);
10004 || GET_MODE (reg) != mode
10005 || REGNO (reg) <= regno
10008 (unsigned int) (first_regno + regs_per_val * (i - base))))
10009 /* Don't allow SP to be loaded unless it is also the base register. It
10010 guarantees that SP is reset correctly when an LDM instruction
10011 is interrupted. Otherwise, we might end up with a corrupt stack. */
10012 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10014 || GET_MODE (mem) != mode
10015 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10016 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10017 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10018 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10019 offset + (i - base) * reg_increment))
10020 && (!REG_P (XEXP (mem, 0))
10021 || offset + (i - base) * reg_increment != 0)))
10024 regno = REGNO (reg);
10025 if (regno == REGNO (addr))
10026 addr_reg_in_reglist = true;
10031 if (update && addr_reg_in_reglist)
10034 /* For Thumb-1, address register is always modified - either by write-back
10035 or by explicit load. If the pattern does not describe an update,
10036 then the address register must be in the list of loaded registers. */
10038 return update || addr_reg_in_reglist;
10044 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10045 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10046 instruction. ADD_OFFSET is nonzero if the base address register needs
10047 to be modified with an add instruction before we can use it. */
10050 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10051 int nops, HOST_WIDE_INT add_offset)
10053 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10054 if the offset isn't small enough. The reason 2 ldrs are faster
10055 is because these ARMs are able to do more than one cache access
10056 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10057 whilst the ARM8 has a double bandwidth cache. This means that
10058 these cores can do both an instruction fetch and a data fetch in
10059 a single cycle, so the trick of calculating the address into a
10060 scratch register (one of the result regs) and then doing a load
10061 multiple actually becomes slower (and no smaller in code size).
10062 That is the transformation
10064 ldr rd1, [rbase + offset]
10065 ldr rd2, [rbase + offset + 4]
10069 add rd1, rbase, offset
10070 ldmia rd1, {rd1, rd2}
10072 produces worse code -- '3 cycles + any stalls on rd2' instead of
10073 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10074 access per cycle, the first sequence could never complete in less
10075 than 6 cycles, whereas the ldm sequence would only take 5 and
10076 would make better use of sequential accesses if not hitting the
10079 We cheat here and test 'arm_ld_sched' which we currently know to
10080 only be true for the ARM8, ARM9 and StrongARM. If this ever
10081 changes, then the test below needs to be reworked. */
10082 if (nops == 2 && arm_ld_sched && add_offset != 0)
10085 /* XScale has load-store double instructions, but they have stricter
10086 alignment requirements than load-store multiple, so we cannot
10089 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10090 the pipeline until completion.
10098 An ldr instruction takes 1-3 cycles, but does not block the
10107 Best case ldr will always win. However, the more ldr instructions
10108 we issue, the less likely we are to be able to schedule them well.
10109 Using ldr instructions also increases code size.
10111 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10112 for counts of 3 or 4 regs. */
10113 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10118 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10119 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10120 an array ORDER which describes the sequence to use when accessing the
10121 offsets that produces an ascending order. In this sequence, each
10122 offset must be larger by exactly 4 than the previous one. ORDER[0]
10123 must have been filled in with the lowest offset by the caller.
10124 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10125 we use to verify that ORDER produces an ascending order of registers.
10126 Return true if it was possible to construct such an order, false if
10130 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10131 int *unsorted_regs)
10134 for (i = 1; i < nops; i++)
10138 order[i] = order[i - 1];
10139 for (j = 0; j < nops; j++)
10140 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10142 /* We must find exactly one offset that is higher than the
10143 previous one by 4. */
10144 if (order[i] != order[i - 1])
10148 if (order[i] == order[i - 1])
10150 /* The register numbers must be ascending. */
10151 if (unsorted_regs != NULL
10152 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10158 /* Used to determine in a peephole whether a sequence of load
10159 instructions can be changed into a load-multiple instruction.
10160 NOPS is the number of separate load instructions we are examining. The
10161 first NOPS entries in OPERANDS are the destination registers, the
10162 next NOPS entries are memory operands. If this function is
10163 successful, *BASE is set to the common base register of the memory
10164 accesses; *LOAD_OFFSET is set to the first memory location's offset
10165 from that base register.
10166 REGS is an array filled in with the destination register numbers.
10167 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10168 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10169 the sequence of registers in REGS matches the loads from ascending memory
10170 locations, and the function verifies that the register numbers are
10171 themselves ascending. If CHECK_REGS is false, the register numbers
10172 are stored in the order they are found in the operands. */
10174 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10175 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10177 int unsorted_regs[MAX_LDM_STM_OPS];
10178 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10179 int order[MAX_LDM_STM_OPS];
10180 rtx base_reg_rtx = NULL;
10184 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10185 easily extended if required. */
10186 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10188 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10190 /* Loop over the operands and check that the memory references are
10191 suitable (i.e. immediate offsets from the same base register). At
10192 the same time, extract the target register, and the memory
10194 for (i = 0; i < nops; i++)
10199 /* Convert a subreg of a mem into the mem itself. */
10200 if (GET_CODE (operands[nops + i]) == SUBREG)
10201 operands[nops + i] = alter_subreg (operands + (nops + i));
10203 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10205 /* Don't reorder volatile memory references; it doesn't seem worth
10206 looking for the case where the order is ok anyway. */
10207 if (MEM_VOLATILE_P (operands[nops + i]))
10210 offset = const0_rtx;
10212 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10213 || (GET_CODE (reg) == SUBREG
10214 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10215 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10216 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10218 || (GET_CODE (reg) == SUBREG
10219 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10220 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10225 base_reg = REGNO (reg);
10226 base_reg_rtx = reg;
10227 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10230 else if (base_reg != (int) REGNO (reg))
10231 /* Not addressed from the same base register. */
10234 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10235 ? REGNO (operands[i])
10236 : REGNO (SUBREG_REG (operands[i])));
10238 /* If it isn't an integer register, or if it overwrites the
10239 base register but isn't the last insn in the list, then
10240 we can't do this. */
10241 if (unsorted_regs[i] < 0
10242 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10243 || unsorted_regs[i] > 14
10244 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10247 unsorted_offsets[i] = INTVAL (offset);
10248 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10252 /* Not a suitable memory address. */
10256 /* All the useful information has now been extracted from the
10257 operands into unsorted_regs and unsorted_offsets; additionally,
10258 order[0] has been set to the lowest offset in the list. Sort
10259 the offsets into order, verifying that they are adjacent, and
10260 check that the register numbers are ascending. */
10261 if (!compute_offset_order (nops, unsorted_offsets, order,
10262 check_regs ? unsorted_regs : NULL))
10266 memcpy (saved_order, order, sizeof order);
10272 for (i = 0; i < nops; i++)
10273 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10275 *load_offset = unsorted_offsets[order[0]];
10279 && !peep2_reg_dead_p (nops, base_reg_rtx))
10282 if (unsorted_offsets[order[0]] == 0)
10283 ldm_case = 1; /* ldmia */
10284 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10285 ldm_case = 2; /* ldmib */
10286 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10287 ldm_case = 3; /* ldmda */
10288 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10289 ldm_case = 4; /* ldmdb */
10290 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10291 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10296 if (!multiple_operation_profitable_p (false, nops,
10298 ? unsorted_offsets[order[0]] : 0))
10304 /* Used to determine in a peephole whether a sequence of store instructions can
10305 be changed into a store-multiple instruction.
10306 NOPS is the number of separate store instructions we are examining.
10307 NOPS_TOTAL is the total number of instructions recognized by the peephole
10309 The first NOPS entries in OPERANDS are the source registers, the next
10310 NOPS entries are memory operands. If this function is successful, *BASE is
10311 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10312 to the first memory location's offset from that base register. REGS is an
10313 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10314 likewise filled with the corresponding rtx's.
10315 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10316 numbers to an ascending order of stores.
10317 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10318 from ascending memory locations, and the function verifies that the register
10319 numbers are themselves ascending. If CHECK_REGS is false, the register
10320 numbers are stored in the order they are found in the operands. */
10322 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10323 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10324 HOST_WIDE_INT *load_offset, bool check_regs)
10326 int unsorted_regs[MAX_LDM_STM_OPS];
10327 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10328 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10329 int order[MAX_LDM_STM_OPS];
10331 rtx base_reg_rtx = NULL;
10334 /* Write back of base register is currently only supported for Thumb 1. */
10335 int base_writeback = TARGET_THUMB1;
10337 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10338 easily extended if required. */
10339 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10341 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10343 /* Loop over the operands and check that the memory references are
10344 suitable (i.e. immediate offsets from the same base register). At
10345 the same time, extract the target register, and the memory
10347 for (i = 0; i < nops; i++)
10352 /* Convert a subreg of a mem into the mem itself. */
10353 if (GET_CODE (operands[nops + i]) == SUBREG)
10354 operands[nops + i] = alter_subreg (operands + (nops + i));
10356 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10358 /* Don't reorder volatile memory references; it doesn't seem worth
10359 looking for the case where the order is ok anyway. */
10360 if (MEM_VOLATILE_P (operands[nops + i]))
10363 offset = const0_rtx;
10365 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10366 || (GET_CODE (reg) == SUBREG
10367 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10368 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10369 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10371 || (GET_CODE (reg) == SUBREG
10372 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10373 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10376 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10377 ? operands[i] : SUBREG_REG (operands[i]));
10378 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10382 base_reg = REGNO (reg);
10383 base_reg_rtx = reg;
10384 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10387 else if (base_reg != (int) REGNO (reg))
10388 /* Not addressed from the same base register. */
10391 /* If it isn't an integer register, then we can't do this. */
10392 if (unsorted_regs[i] < 0
10393 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10394 /* The effects are unpredictable if the base register is
10395 both updated and stored. */
10396 || (base_writeback && unsorted_regs[i] == base_reg)
10397 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10398 || unsorted_regs[i] > 14)
10401 unsorted_offsets[i] = INTVAL (offset);
10402 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10406 /* Not a suitable memory address. */
10410 /* All the useful information has now been extracted from the
10411 operands into unsorted_regs and unsorted_offsets; additionally,
10412 order[0] has been set to the lowest offset in the list. Sort
10413 the offsets into order, verifying that they are adjacent, and
10414 check that the register numbers are ascending. */
10415 if (!compute_offset_order (nops, unsorted_offsets, order,
10416 check_regs ? unsorted_regs : NULL))
10420 memcpy (saved_order, order, sizeof order);
10426 for (i = 0; i < nops; i++)
10428 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10430 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10433 *load_offset = unsorted_offsets[order[0]];
10437 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10440 if (unsorted_offsets[order[0]] == 0)
10441 stm_case = 1; /* stmia */
10442 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10443 stm_case = 2; /* stmib */
10444 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10445 stm_case = 3; /* stmda */
10446 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10447 stm_case = 4; /* stmdb */
10451 if (!multiple_operation_profitable_p (false, nops, 0))
10457 /* Routines for use in generating RTL. */
10459 /* Generate a load-multiple instruction. COUNT is the number of loads in
10460 the instruction; REGS and MEMS are arrays containing the operands.
10461 BASEREG is the base register to be used in addressing the memory operands.
10462 WBACK_OFFSET is nonzero if the instruction should update the base
10466 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10467 HOST_WIDE_INT wback_offset)
10472 if (!multiple_operation_profitable_p (false, count, 0))
10478 for (i = 0; i < count; i++)
10479 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10481 if (wback_offset != 0)
10482 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10484 seq = get_insns ();
10490 result = gen_rtx_PARALLEL (VOIDmode,
10491 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10492 if (wback_offset != 0)
10494 XVECEXP (result, 0, 0)
10495 = gen_rtx_SET (VOIDmode, basereg,
10496 plus_constant (Pmode, basereg, wback_offset));
10501 for (j = 0; i < count; i++, j++)
10502 XVECEXP (result, 0, i)
10503 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10508 /* Generate a store-multiple instruction. COUNT is the number of stores in
10509 the instruction; REGS and MEMS are arrays containing the operands.
10510 BASEREG is the base register to be used in addressing the memory operands.
10511 WBACK_OFFSET is nonzero if the instruction should update the base
10515 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10516 HOST_WIDE_INT wback_offset)
10521 if (GET_CODE (basereg) == PLUS)
10522 basereg = XEXP (basereg, 0);
10524 if (!multiple_operation_profitable_p (false, count, 0))
10530 for (i = 0; i < count; i++)
10531 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10533 if (wback_offset != 0)
10534 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10536 seq = get_insns ();
10542 result = gen_rtx_PARALLEL (VOIDmode,
10543 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10544 if (wback_offset != 0)
10546 XVECEXP (result, 0, 0)
10547 = gen_rtx_SET (VOIDmode, basereg,
10548 plus_constant (Pmode, basereg, wback_offset));
10553 for (j = 0; i < count; i++, j++)
10554 XVECEXP (result, 0, i)
10555 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10560 /* Generate either a load-multiple or a store-multiple instruction. This
10561 function can be used in situations where we can start with a single MEM
10562 rtx and adjust its address upwards.
10563 COUNT is the number of operations in the instruction, not counting a
10564 possible update of the base register. REGS is an array containing the
10566 BASEREG is the base register to be used in addressing the memory operands,
10567 which are constructed from BASEMEM.
10568 WRITE_BACK specifies whether the generated instruction should include an
10569 update of the base register.
10570 OFFSETP is used to pass an offset to and from this function; this offset
10571 is not used when constructing the address (instead BASEMEM should have an
10572 appropriate offset in its address), it is used only for setting
10573 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10576 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10577 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10579 rtx mems[MAX_LDM_STM_OPS];
10580 HOST_WIDE_INT offset = *offsetp;
10583 gcc_assert (count <= MAX_LDM_STM_OPS);
10585 if (GET_CODE (basereg) == PLUS)
10586 basereg = XEXP (basereg, 0);
10588 for (i = 0; i < count; i++)
10590 rtx addr = plus_constant (Pmode, basereg, i * 4);
10591 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10599 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10600 write_back ? 4 * count : 0);
10602 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10603 write_back ? 4 * count : 0);
10607 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10608 rtx basemem, HOST_WIDE_INT *offsetp)
10610 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10615 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10616 rtx basemem, HOST_WIDE_INT *offsetp)
10618 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10622 /* Called from a peephole2 expander to turn a sequence of loads into an
10623 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10624 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10625 is true if we can reorder the registers because they are used commutatively
10627 Returns true iff we could generate a new instruction. */
10630 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10632 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10633 rtx mems[MAX_LDM_STM_OPS];
10634 int i, j, base_reg;
10636 HOST_WIDE_INT offset;
10637 int write_back = FALSE;
10641 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10642 &base_reg, &offset, !sort_regs);
10648 for (i = 0; i < nops - 1; i++)
10649 for (j = i + 1; j < nops; j++)
10650 if (regs[i] > regs[j])
10656 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10660 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10661 gcc_assert (ldm_case == 1 || ldm_case == 5);
10667 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10668 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10670 if (!TARGET_THUMB1)
10672 base_reg = regs[0];
10673 base_reg_rtx = newbase;
10677 for (i = 0; i < nops; i++)
10679 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10680 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10683 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10684 write_back ? offset + i * 4 : 0));
10688 /* Called from a peephole2 expander to turn a sequence of stores into an
10689 STM instruction. OPERANDS are the operands found by the peephole matcher;
10690 NOPS indicates how many separate stores we are trying to combine.
10691 Returns true iff we could generate a new instruction. */
10694 gen_stm_seq (rtx *operands, int nops)
10697 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10698 rtx mems[MAX_LDM_STM_OPS];
10701 HOST_WIDE_INT offset;
10702 int write_back = FALSE;
10705 bool base_reg_dies;
10707 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10708 mem_order, &base_reg, &offset, true);
10713 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10715 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10718 gcc_assert (base_reg_dies);
10724 gcc_assert (base_reg_dies);
10725 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10729 addr = plus_constant (Pmode, base_reg_rtx, offset);
10731 for (i = 0; i < nops; i++)
10733 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10734 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10737 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10738 write_back ? offset + i * 4 : 0));
10742 /* Called from a peephole2 expander to turn a sequence of stores that are
10743 preceded by constant loads into an STM instruction. OPERANDS are the
10744 operands found by the peephole matcher; NOPS indicates how many
10745 separate stores we are trying to combine; there are 2 * NOPS
10746 instructions in the peephole.
10747 Returns true iff we could generate a new instruction. */
10750 gen_const_stm_seq (rtx *operands, int nops)
10752 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10753 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10754 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10755 rtx mems[MAX_LDM_STM_OPS];
10758 HOST_WIDE_INT offset;
10759 int write_back = FALSE;
10762 bool base_reg_dies;
10764 HARD_REG_SET allocated;
10766 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10767 mem_order, &base_reg, &offset, false);
10772 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10774 /* If the same register is used more than once, try to find a free
10776 CLEAR_HARD_REG_SET (allocated);
10777 for (i = 0; i < nops; i++)
10779 for (j = i + 1; j < nops; j++)
10780 if (regs[i] == regs[j])
10782 rtx t = peep2_find_free_register (0, nops * 2,
10783 TARGET_THUMB1 ? "l" : "r",
10784 SImode, &allocated);
10788 regs[i] = REGNO (t);
10792 /* Compute an ordering that maps the register numbers to an ascending
10795 for (i = 0; i < nops; i++)
10796 if (regs[i] < regs[reg_order[0]])
10799 for (i = 1; i < nops; i++)
10801 int this_order = reg_order[i - 1];
10802 for (j = 0; j < nops; j++)
10803 if (regs[j] > regs[reg_order[i - 1]]
10804 && (this_order == reg_order[i - 1]
10805 || regs[j] < regs[this_order]))
10807 reg_order[i] = this_order;
10810 /* Ensure that registers that must be live after the instruction end
10811 up with the correct value. */
10812 for (i = 0; i < nops; i++)
10814 int this_order = reg_order[i];
10815 if ((this_order != mem_order[i]
10816 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10817 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10821 /* Load the constants. */
10822 for (i = 0; i < nops; i++)
10824 rtx op = operands[2 * nops + mem_order[i]];
10825 sorted_regs[i] = regs[reg_order[i]];
10826 emit_move_insn (reg_rtxs[reg_order[i]], op);
10829 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10831 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10834 gcc_assert (base_reg_dies);
10840 gcc_assert (base_reg_dies);
10841 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10845 addr = plus_constant (Pmode, base_reg_rtx, offset);
10847 for (i = 0; i < nops; i++)
10849 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10850 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10853 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10854 write_back ? offset + i * 4 : 0));
10858 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10859 unaligned copies on processors which support unaligned semantics for those
10860 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10861 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10862 An interleave factor of 1 (the minimum) will perform no interleaving.
10863 Load/store multiple are used for aligned addresses where possible. */
10866 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10867 HOST_WIDE_INT length,
10868 unsigned int interleave_factor)
10870 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10871 int *regnos = XALLOCAVEC (int, interleave_factor);
10872 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10873 HOST_WIDE_INT i, j;
10874 HOST_WIDE_INT remaining = length, words;
10875 rtx halfword_tmp = NULL, byte_tmp = NULL;
10877 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10878 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10879 HOST_WIDE_INT srcoffset, dstoffset;
10880 HOST_WIDE_INT src_autoinc, dst_autoinc;
10883 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10885 /* Use hard registers if we have aligned source or destination so we can use
10886 load/store multiple with contiguous registers. */
10887 if (dst_aligned || src_aligned)
10888 for (i = 0; i < interleave_factor; i++)
10889 regs[i] = gen_rtx_REG (SImode, i);
10891 for (i = 0; i < interleave_factor; i++)
10892 regs[i] = gen_reg_rtx (SImode);
10894 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10895 src = copy_addr_to_reg (XEXP (srcbase, 0));
10897 srcoffset = dstoffset = 0;
10899 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10900 For copying the last bytes we want to subtract this offset again. */
10901 src_autoinc = dst_autoinc = 0;
10903 for (i = 0; i < interleave_factor; i++)
10906 /* Copy BLOCK_SIZE_BYTES chunks. */
10908 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10911 if (src_aligned && interleave_factor > 1)
10913 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10914 TRUE, srcbase, &srcoffset));
10915 src_autoinc += UNITS_PER_WORD * interleave_factor;
10919 for (j = 0; j < interleave_factor; j++)
10921 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
10923 mem = adjust_automodify_address (srcbase, SImode, addr,
10924 srcoffset + j * UNITS_PER_WORD);
10925 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10927 srcoffset += block_size_bytes;
10931 if (dst_aligned && interleave_factor > 1)
10933 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10934 TRUE, dstbase, &dstoffset));
10935 dst_autoinc += UNITS_PER_WORD * interleave_factor;
10939 for (j = 0; j < interleave_factor; j++)
10941 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
10943 mem = adjust_automodify_address (dstbase, SImode, addr,
10944 dstoffset + j * UNITS_PER_WORD);
10945 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10947 dstoffset += block_size_bytes;
10950 remaining -= block_size_bytes;
10953 /* Copy any whole words left (note these aren't interleaved with any
10954 subsequent halfword/byte load/stores in the interests of simplicity). */
10956 words = remaining / UNITS_PER_WORD;
10958 gcc_assert (words < interleave_factor);
10960 if (src_aligned && words > 1)
10962 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
10964 src_autoinc += UNITS_PER_WORD * words;
10968 for (j = 0; j < words; j++)
10970 addr = plus_constant (Pmode, src,
10971 srcoffset + j * UNITS_PER_WORD - src_autoinc);
10972 mem = adjust_automodify_address (srcbase, SImode, addr,
10973 srcoffset + j * UNITS_PER_WORD);
10974 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10976 srcoffset += words * UNITS_PER_WORD;
10979 if (dst_aligned && words > 1)
10981 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
10983 dst_autoinc += words * UNITS_PER_WORD;
10987 for (j = 0; j < words; j++)
10989 addr = plus_constant (Pmode, dst,
10990 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
10991 mem = adjust_automodify_address (dstbase, SImode, addr,
10992 dstoffset + j * UNITS_PER_WORD);
10993 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10995 dstoffset += words * UNITS_PER_WORD;
10998 remaining -= words * UNITS_PER_WORD;
11000 gcc_assert (remaining < 4);
11002 /* Copy a halfword if necessary. */
11004 if (remaining >= 2)
11006 halfword_tmp = gen_reg_rtx (SImode);
11008 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11009 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11010 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11012 /* Either write out immediately, or delay until we've loaded the last
11013 byte, depending on interleave factor. */
11014 if (interleave_factor == 1)
11016 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11017 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11018 emit_insn (gen_unaligned_storehi (mem,
11019 gen_lowpart (HImode, halfword_tmp)));
11020 halfword_tmp = NULL;
11028 gcc_assert (remaining < 2);
11030 /* Copy last byte. */
11032 if ((remaining & 1) != 0)
11034 byte_tmp = gen_reg_rtx (SImode);
11036 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11037 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11038 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11040 if (interleave_factor == 1)
11042 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11043 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11044 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11053 /* Store last halfword if we haven't done so already. */
11057 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11058 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11059 emit_insn (gen_unaligned_storehi (mem,
11060 gen_lowpart (HImode, halfword_tmp)));
11064 /* Likewise for last byte. */
11068 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11069 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11070 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11074 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11077 /* From mips_adjust_block_mem:
11079 Helper function for doing a loop-based block operation on memory
11080 reference MEM. Each iteration of the loop will operate on LENGTH
11083 Create a new base register for use within the loop and point it to
11084 the start of MEM. Create a new memory reference that uses this
11085 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11088 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11091 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11093 /* Although the new mem does not refer to a known location,
11094 it does keep up to LENGTH bytes of alignment. */
11095 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11096 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11099 /* From mips_block_move_loop:
11101 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11102 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11103 the memory regions do not overlap. */
11106 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11107 unsigned int interleave_factor,
11108 HOST_WIDE_INT bytes_per_iter)
11110 rtx label, src_reg, dest_reg, final_src, test;
11111 HOST_WIDE_INT leftover;
11113 leftover = length % bytes_per_iter;
11114 length -= leftover;
11116 /* Create registers and memory references for use within the loop. */
11117 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11118 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11120 /* Calculate the value that SRC_REG should have after the last iteration of
11122 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11123 0, 0, OPTAB_WIDEN);
11125 /* Emit the start of the loop. */
11126 label = gen_label_rtx ();
11127 emit_label (label);
11129 /* Emit the loop body. */
11130 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11131 interleave_factor);
11133 /* Move on to the next block. */
11134 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11135 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11137 /* Emit the loop condition. */
11138 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11139 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11141 /* Mop up any left-over bytes. */
11143 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11146 /* Emit a block move when either the source or destination is unaligned (not
11147 aligned to a four-byte boundary). This may need further tuning depending on
11148 core type, optimize_size setting, etc. */
11151 arm_movmemqi_unaligned (rtx *operands)
11153 HOST_WIDE_INT length = INTVAL (operands[2]);
11157 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11158 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11159 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11160 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11161 or dst_aligned though: allow more interleaving in those cases since the
11162 resulting code can be smaller. */
11163 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11164 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11167 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11168 interleave_factor, bytes_per_iter);
11170 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11171 interleave_factor);
11175 /* Note that the loop created by arm_block_move_unaligned_loop may be
11176 subject to loop unrolling, which makes tuning this condition a little
11179 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11181 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11188 arm_gen_movmemqi (rtx *operands)
11190 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11191 HOST_WIDE_INT srcoffset, dstoffset;
11193 rtx src, dst, srcbase, dstbase;
11194 rtx part_bytes_reg = NULL;
11197 if (GET_CODE (operands[2]) != CONST_INT
11198 || GET_CODE (operands[3]) != CONST_INT
11199 || INTVAL (operands[2]) > 64)
11202 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11203 return arm_movmemqi_unaligned (operands);
11205 if (INTVAL (operands[3]) & 3)
11208 dstbase = operands[0];
11209 srcbase = operands[1];
11211 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11212 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11214 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11215 out_words_to_go = INTVAL (operands[2]) / 4;
11216 last_bytes = INTVAL (operands[2]) & 3;
11217 dstoffset = srcoffset = 0;
11219 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11220 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11222 for (i = 0; in_words_to_go >= 2; i+=4)
11224 if (in_words_to_go > 4)
11225 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11226 TRUE, srcbase, &srcoffset));
11228 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11229 src, FALSE, srcbase,
11232 if (out_words_to_go)
11234 if (out_words_to_go > 4)
11235 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11236 TRUE, dstbase, &dstoffset));
11237 else if (out_words_to_go != 1)
11238 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11239 out_words_to_go, dst,
11242 dstbase, &dstoffset));
11245 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11246 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11247 if (last_bytes != 0)
11249 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11255 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11256 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11259 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11260 if (out_words_to_go)
11264 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11265 sreg = copy_to_reg (mem);
11267 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11268 emit_move_insn (mem, sreg);
11271 gcc_assert (!in_words_to_go); /* Sanity check */
11274 if (in_words_to_go)
11276 gcc_assert (in_words_to_go > 0);
11278 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11279 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11282 gcc_assert (!last_bytes || part_bytes_reg);
11284 if (BYTES_BIG_ENDIAN && last_bytes)
11286 rtx tmp = gen_reg_rtx (SImode);
11288 /* The bytes we want are in the top end of the word. */
11289 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11290 GEN_INT (8 * (4 - last_bytes))));
11291 part_bytes_reg = tmp;
11295 mem = adjust_automodify_address (dstbase, QImode,
11296 plus_constant (Pmode, dst,
11298 dstoffset + last_bytes - 1);
11299 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11303 tmp = gen_reg_rtx (SImode);
11304 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11305 part_bytes_reg = tmp;
11312 if (last_bytes > 1)
11314 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11315 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11319 rtx tmp = gen_reg_rtx (SImode);
11320 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11321 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11322 part_bytes_reg = tmp;
11329 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11330 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11337 /* Select a dominance comparison mode if possible for a test of the general
11338 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11339 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11340 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11341 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11342 In all cases OP will be either EQ or NE, but we don't need to know which
11343 here. If we are unable to support a dominance comparison we return
11344 CC mode. This will then fail to match for the RTL expressions that
11345 generate this call. */
11347 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11349 enum rtx_code cond1, cond2;
11352 /* Currently we will probably get the wrong result if the individual
11353 comparisons are not simple. This also ensures that it is safe to
11354 reverse a comparison if necessary. */
11355 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11357 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11361 /* The if_then_else variant of this tests the second condition if the
11362 first passes, but is true if the first fails. Reverse the first
11363 condition to get a true "inclusive-or" expression. */
11364 if (cond_or == DOM_CC_NX_OR_Y)
11365 cond1 = reverse_condition (cond1);
11367 /* If the comparisons are not equal, and one doesn't dominate the other,
11368 then we can't do this. */
11370 && !comparison_dominates_p (cond1, cond2)
11371 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11376 enum rtx_code temp = cond1;
11384 if (cond_or == DOM_CC_X_AND_Y)
11389 case EQ: return CC_DEQmode;
11390 case LE: return CC_DLEmode;
11391 case LEU: return CC_DLEUmode;
11392 case GE: return CC_DGEmode;
11393 case GEU: return CC_DGEUmode;
11394 default: gcc_unreachable ();
11398 if (cond_or == DOM_CC_X_AND_Y)
11410 gcc_unreachable ();
11414 if (cond_or == DOM_CC_X_AND_Y)
11426 gcc_unreachable ();
11430 if (cond_or == DOM_CC_X_AND_Y)
11431 return CC_DLTUmode;
11436 return CC_DLTUmode;
11438 return CC_DLEUmode;
11442 gcc_unreachable ();
11446 if (cond_or == DOM_CC_X_AND_Y)
11447 return CC_DGTUmode;
11452 return CC_DGTUmode;
11454 return CC_DGEUmode;
11458 gcc_unreachable ();
11461 /* The remaining cases only occur when both comparisons are the
11464 gcc_assert (cond1 == cond2);
11468 gcc_assert (cond1 == cond2);
11472 gcc_assert (cond1 == cond2);
11476 gcc_assert (cond1 == cond2);
11477 return CC_DLEUmode;
11480 gcc_assert (cond1 == cond2);
11481 return CC_DGEUmode;
11484 gcc_unreachable ();
11489 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11491 /* All floating point compares return CCFP if it is an equality
11492 comparison, and CCFPE otherwise. */
11493 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11516 gcc_unreachable ();
11520 /* A compare with a shifted operand. Because of canonicalization, the
11521 comparison will have to be swapped when we emit the assembler. */
11522 if (GET_MODE (y) == SImode
11523 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11524 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11525 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11526 || GET_CODE (x) == ROTATERT))
11529 /* This operation is performed swapped, but since we only rely on the Z
11530 flag we don't need an additional mode. */
11531 if (GET_MODE (y) == SImode
11532 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11533 && GET_CODE (x) == NEG
11534 && (op == EQ || op == NE))
11537 /* This is a special case that is used by combine to allow a
11538 comparison of a shifted byte load to be split into a zero-extend
11539 followed by a comparison of the shifted integer (only valid for
11540 equalities and unsigned inequalities). */
11541 if (GET_MODE (x) == SImode
11542 && GET_CODE (x) == ASHIFT
11543 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11544 && GET_CODE (XEXP (x, 0)) == SUBREG
11545 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11546 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11547 && (op == EQ || op == NE
11548 || op == GEU || op == GTU || op == LTU || op == LEU)
11549 && GET_CODE (y) == CONST_INT)
11552 /* A construct for a conditional compare, if the false arm contains
11553 0, then both conditions must be true, otherwise either condition
11554 must be true. Not all conditions are possible, so CCmode is
11555 returned if it can't be done. */
11556 if (GET_CODE (x) == IF_THEN_ELSE
11557 && (XEXP (x, 2) == const0_rtx
11558 || XEXP (x, 2) == const1_rtx)
11559 && COMPARISON_P (XEXP (x, 0))
11560 && COMPARISON_P (XEXP (x, 1)))
11561 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11562 INTVAL (XEXP (x, 2)));
11564 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11565 if (GET_CODE (x) == AND
11566 && (op == EQ || op == NE)
11567 && COMPARISON_P (XEXP (x, 0))
11568 && COMPARISON_P (XEXP (x, 1)))
11569 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11572 if (GET_CODE (x) == IOR
11573 && (op == EQ || op == NE)
11574 && COMPARISON_P (XEXP (x, 0))
11575 && COMPARISON_P (XEXP (x, 1)))
11576 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11579 /* An operation (on Thumb) where we want to test for a single bit.
11580 This is done by shifting that bit up into the top bit of a
11581 scratch register; we can then branch on the sign bit. */
11583 && GET_MODE (x) == SImode
11584 && (op == EQ || op == NE)
11585 && GET_CODE (x) == ZERO_EXTRACT
11586 && XEXP (x, 1) == const1_rtx)
11589 /* An operation that sets the condition codes as a side-effect, the
11590 V flag is not set correctly, so we can only use comparisons where
11591 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11593 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11594 if (GET_MODE (x) == SImode
11596 && (op == EQ || op == NE || op == LT || op == GE)
11597 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11598 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11599 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11600 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11601 || GET_CODE (x) == LSHIFTRT
11602 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11603 || GET_CODE (x) == ROTATERT
11604 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11605 return CC_NOOVmode;
11607 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11610 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11611 && GET_CODE (x) == PLUS
11612 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11615 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11621 /* A DImode comparison against zero can be implemented by
11622 or'ing the two halves together. */
11623 if (y == const0_rtx)
11626 /* We can do an equality test in three Thumb instructions. */
11636 /* DImode unsigned comparisons can be implemented by cmp +
11637 cmpeq without a scratch register. Not worth doing in
11648 /* DImode signed and unsigned comparisons can be implemented
11649 by cmp + sbcs with a scratch register, but that does not
11650 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11651 gcc_assert (op != EQ && op != NE);
11655 gcc_unreachable ();
11659 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11660 return GET_MODE (x);
11665 /* X and Y are two things to compare using CODE. Emit the compare insn and
11666 return the rtx for register 0 in the proper mode. FP means this is a
11667 floating point compare: I don't think that it is needed on the arm. */
11669 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11671 enum machine_mode mode;
11673 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11675 /* We might have X as a constant, Y as a register because of the predicates
11676 used for cmpdi. If so, force X to a register here. */
11677 if (dimode_comparison && !REG_P (x))
11678 x = force_reg (DImode, x);
11680 mode = SELECT_CC_MODE (code, x, y);
11681 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11683 if (dimode_comparison
11684 && mode != CC_CZmode)
11688 /* To compare two non-zero values for equality, XOR them and
11689 then compare against zero. Not used for ARM mode; there
11690 CC_CZmode is cheaper. */
11691 if (mode == CC_Zmode && y != const0_rtx)
11693 gcc_assert (!reload_completed);
11694 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11698 /* A scratch register is required. */
11699 if (reload_completed)
11700 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11702 scratch = gen_rtx_SCRATCH (SImode);
11704 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11705 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11706 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11709 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11714 /* Generate a sequence of insns that will generate the correct return
11715 address mask depending on the physical architecture that the program
11718 arm_gen_return_addr_mask (void)
11720 rtx reg = gen_reg_rtx (Pmode);
11722 emit_insn (gen_return_addr_mask (reg));
11727 arm_reload_in_hi (rtx *operands)
11729 rtx ref = operands[1];
11731 HOST_WIDE_INT offset = 0;
11733 if (GET_CODE (ref) == SUBREG)
11735 offset = SUBREG_BYTE (ref);
11736 ref = SUBREG_REG (ref);
11739 if (GET_CODE (ref) == REG)
11741 /* We have a pseudo which has been spilt onto the stack; there
11742 are two cases here: the first where there is a simple
11743 stack-slot replacement and a second where the stack-slot is
11744 out of range, or is used as a subreg. */
11745 if (reg_equiv_mem (REGNO (ref)))
11747 ref = reg_equiv_mem (REGNO (ref));
11748 base = find_replacement (&XEXP (ref, 0));
11751 /* The slot is out of range, or was dressed up in a SUBREG. */
11752 base = reg_equiv_address (REGNO (ref));
11755 base = find_replacement (&XEXP (ref, 0));
11757 /* Handle the case where the address is too complex to be offset by 1. */
11758 if (GET_CODE (base) == MINUS
11759 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11761 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11763 emit_set_insn (base_plus, base);
11766 else if (GET_CODE (base) == PLUS)
11768 /* The addend must be CONST_INT, or we would have dealt with it above. */
11769 HOST_WIDE_INT hi, lo;
11771 offset += INTVAL (XEXP (base, 1));
11772 base = XEXP (base, 0);
11774 /* Rework the address into a legal sequence of insns. */
11775 /* Valid range for lo is -4095 -> 4095 */
11778 : -((-offset) & 0xfff));
11780 /* Corner case, if lo is the max offset then we would be out of range
11781 once we have added the additional 1 below, so bump the msb into the
11782 pre-loading insn(s). */
11786 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11787 ^ (HOST_WIDE_INT) 0x80000000)
11788 - (HOST_WIDE_INT) 0x80000000);
11790 gcc_assert (hi + lo == offset);
11794 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11796 /* Get the base address; addsi3 knows how to handle constants
11797 that require more than one insn. */
11798 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11804 /* Operands[2] may overlap operands[0] (though it won't overlap
11805 operands[1]), that's why we asked for a DImode reg -- so we can
11806 use the bit that does not overlap. */
11807 if (REGNO (operands[2]) == REGNO (operands[0]))
11808 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11810 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11812 emit_insn (gen_zero_extendqisi2 (scratch,
11813 gen_rtx_MEM (QImode,
11814 plus_constant (Pmode, base,
11816 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11817 gen_rtx_MEM (QImode,
11818 plus_constant (Pmode, base,
11820 if (!BYTES_BIG_ENDIAN)
11821 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11822 gen_rtx_IOR (SImode,
11825 gen_rtx_SUBREG (SImode, operands[0], 0),
11829 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11830 gen_rtx_IOR (SImode,
11831 gen_rtx_ASHIFT (SImode, scratch,
11833 gen_rtx_SUBREG (SImode, operands[0], 0)));
11836 /* Handle storing a half-word to memory during reload by synthesizing as two
11837 byte stores. Take care not to clobber the input values until after we
11838 have moved them somewhere safe. This code assumes that if the DImode
11839 scratch in operands[2] overlaps either the input value or output address
11840 in some way, then that value must die in this insn (we absolutely need
11841 two scratch registers for some corner cases). */
11843 arm_reload_out_hi (rtx *operands)
11845 rtx ref = operands[0];
11846 rtx outval = operands[1];
11848 HOST_WIDE_INT offset = 0;
11850 if (GET_CODE (ref) == SUBREG)
11852 offset = SUBREG_BYTE (ref);
11853 ref = SUBREG_REG (ref);
11856 if (GET_CODE (ref) == REG)
11858 /* We have a pseudo which has been spilt onto the stack; there
11859 are two cases here: the first where there is a simple
11860 stack-slot replacement and a second where the stack-slot is
11861 out of range, or is used as a subreg. */
11862 if (reg_equiv_mem (REGNO (ref)))
11864 ref = reg_equiv_mem (REGNO (ref));
11865 base = find_replacement (&XEXP (ref, 0));
11868 /* The slot is out of range, or was dressed up in a SUBREG. */
11869 base = reg_equiv_address (REGNO (ref));
11872 base = find_replacement (&XEXP (ref, 0));
11874 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11876 /* Handle the case where the address is too complex to be offset by 1. */
11877 if (GET_CODE (base) == MINUS
11878 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11880 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11882 /* Be careful not to destroy OUTVAL. */
11883 if (reg_overlap_mentioned_p (base_plus, outval))
11885 /* Updating base_plus might destroy outval, see if we can
11886 swap the scratch and base_plus. */
11887 if (!reg_overlap_mentioned_p (scratch, outval))
11890 scratch = base_plus;
11895 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11897 /* Be conservative and copy OUTVAL into the scratch now,
11898 this should only be necessary if outval is a subreg
11899 of something larger than a word. */
11900 /* XXX Might this clobber base? I can't see how it can,
11901 since scratch is known to overlap with OUTVAL, and
11902 must be wider than a word. */
11903 emit_insn (gen_movhi (scratch_hi, outval));
11904 outval = scratch_hi;
11908 emit_set_insn (base_plus, base);
11911 else if (GET_CODE (base) == PLUS)
11913 /* The addend must be CONST_INT, or we would have dealt with it above. */
11914 HOST_WIDE_INT hi, lo;
11916 offset += INTVAL (XEXP (base, 1));
11917 base = XEXP (base, 0);
11919 /* Rework the address into a legal sequence of insns. */
11920 /* Valid range for lo is -4095 -> 4095 */
11923 : -((-offset) & 0xfff));
11925 /* Corner case, if lo is the max offset then we would be out of range
11926 once we have added the additional 1 below, so bump the msb into the
11927 pre-loading insn(s). */
11931 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11932 ^ (HOST_WIDE_INT) 0x80000000)
11933 - (HOST_WIDE_INT) 0x80000000);
11935 gcc_assert (hi + lo == offset);
11939 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11941 /* Be careful not to destroy OUTVAL. */
11942 if (reg_overlap_mentioned_p (base_plus, outval))
11944 /* Updating base_plus might destroy outval, see if we
11945 can swap the scratch and base_plus. */
11946 if (!reg_overlap_mentioned_p (scratch, outval))
11949 scratch = base_plus;
11954 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11956 /* Be conservative and copy outval into scratch now,
11957 this should only be necessary if outval is a
11958 subreg of something larger than a word. */
11959 /* XXX Might this clobber base? I can't see how it
11960 can, since scratch is known to overlap with
11962 emit_insn (gen_movhi (scratch_hi, outval));
11963 outval = scratch_hi;
11967 /* Get the base address; addsi3 knows how to handle constants
11968 that require more than one insn. */
11969 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11975 if (BYTES_BIG_ENDIAN)
11977 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11978 plus_constant (Pmode, base,
11980 gen_lowpart (QImode, outval)));
11981 emit_insn (gen_lshrsi3 (scratch,
11982 gen_rtx_SUBREG (SImode, outval, 0),
11984 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
11986 gen_lowpart (QImode, scratch)));
11990 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
11992 gen_lowpart (QImode, outval)));
11993 emit_insn (gen_lshrsi3 (scratch,
11994 gen_rtx_SUBREG (SImode, outval, 0),
11996 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11997 plus_constant (Pmode, base,
11999 gen_lowpart (QImode, scratch)));
12003 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12004 (padded to the size of a word) should be passed in a register. */
12007 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12009 if (TARGET_AAPCS_BASED)
12010 return must_pass_in_stack_var_size (mode, type);
12012 return must_pass_in_stack_var_size_or_pad (mode, type);
12016 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12017 Return true if an argument passed on the stack should be padded upwards,
12018 i.e. if the least-significant byte has useful data.
12019 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12020 aggregate types are placed in the lowest memory address. */
12023 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12025 if (!TARGET_AAPCS_BASED)
12026 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12028 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12035 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12036 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12037 register has useful data, and return the opposite if the most
12038 significant byte does. */
12041 arm_pad_reg_upward (enum machine_mode mode,
12042 tree type, int first ATTRIBUTE_UNUSED)
12044 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12046 /* For AAPCS, small aggregates, small fixed-point types,
12047 and small complex types are always padded upwards. */
12050 if ((AGGREGATE_TYPE_P (type)
12051 || TREE_CODE (type) == COMPLEX_TYPE
12052 || FIXED_POINT_TYPE_P (type))
12053 && int_size_in_bytes (type) <= 4)
12058 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12059 && GET_MODE_SIZE (mode) <= 4)
12064 /* Otherwise, use default padding. */
12065 return !BYTES_BIG_ENDIAN;
12069 /* Print a symbolic form of X to the debug file, F. */
12071 arm_print_value (FILE *f, rtx x)
12073 switch (GET_CODE (x))
12076 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12080 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12088 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12090 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12091 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12099 fprintf (f, "\"%s\"", XSTR (x, 0));
12103 fprintf (f, "`%s'", XSTR (x, 0));
12107 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12111 arm_print_value (f, XEXP (x, 0));
12115 arm_print_value (f, XEXP (x, 0));
12117 arm_print_value (f, XEXP (x, 1));
12125 fprintf (f, "????");
12130 /* Routines for manipulation of the constant pool. */
12132 /* Arm instructions cannot load a large constant directly into a
12133 register; they have to come from a pc relative load. The constant
12134 must therefore be placed in the addressable range of the pc
12135 relative load. Depending on the precise pc relative load
12136 instruction the range is somewhere between 256 bytes and 4k. This
12137 means that we often have to dump a constant inside a function, and
12138 generate code to branch around it.
12140 It is important to minimize this, since the branches will slow
12141 things down and make the code larger.
12143 Normally we can hide the table after an existing unconditional
12144 branch so that there is no interruption of the flow, but in the
12145 worst case the code looks like this:
12163 We fix this by performing a scan after scheduling, which notices
12164 which instructions need to have their operands fetched from the
12165 constant table and builds the table.
12167 The algorithm starts by building a table of all the constants that
12168 need fixing up and all the natural barriers in the function (places
12169 where a constant table can be dropped without breaking the flow).
12170 For each fixup we note how far the pc-relative replacement will be
12171 able to reach and the offset of the instruction into the function.
12173 Having built the table we then group the fixes together to form
12174 tables that are as large as possible (subject to addressing
12175 constraints) and emit each table of constants after the last
12176 barrier that is within range of all the instructions in the group.
12177 If a group does not contain a barrier, then we forcibly create one
12178 by inserting a jump instruction into the flow. Once the table has
12179 been inserted, the insns are then modified to reference the
12180 relevant entry in the pool.
12182 Possible enhancements to the algorithm (not implemented) are:
12184 1) For some processors and object formats, there may be benefit in
12185 aligning the pools to the start of cache lines; this alignment
12186 would need to be taken into account when calculating addressability
12189 /* These typedefs are located at the start of this file, so that
12190 they can be used in the prototypes there. This comment is to
12191 remind readers of that fact so that the following structures
12192 can be understood more easily.
12194 typedef struct minipool_node Mnode;
12195 typedef struct minipool_fixup Mfix; */
12197 struct minipool_node
12199 /* Doubly linked chain of entries. */
12202 /* The maximum offset into the code that this entry can be placed. While
12203 pushing fixes for forward references, all entries are sorted in order
12204 of increasing max_address. */
12205 HOST_WIDE_INT max_address;
12206 /* Similarly for an entry inserted for a backwards ref. */
12207 HOST_WIDE_INT min_address;
12208 /* The number of fixes referencing this entry. This can become zero
12209 if we "unpush" an entry. In this case we ignore the entry when we
12210 come to emit the code. */
12212 /* The offset from the start of the minipool. */
12213 HOST_WIDE_INT offset;
12214 /* The value in table. */
12216 /* The mode of value. */
12217 enum machine_mode mode;
12218 /* The size of the value. With iWMMXt enabled
12219 sizes > 4 also imply an alignment of 8-bytes. */
12223 struct minipool_fixup
12227 HOST_WIDE_INT address;
12229 enum machine_mode mode;
12233 HOST_WIDE_INT forwards;
12234 HOST_WIDE_INT backwards;
12237 /* Fixes less than a word need padding out to a word boundary. */
12238 #define MINIPOOL_FIX_SIZE(mode) \
12239 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12241 static Mnode * minipool_vector_head;
12242 static Mnode * minipool_vector_tail;
12243 static rtx minipool_vector_label;
12244 static int minipool_pad;
12246 /* The linked list of all minipool fixes required for this function. */
12247 Mfix * minipool_fix_head;
12248 Mfix * minipool_fix_tail;
12249 /* The fix entry for the current minipool, once it has been placed. */
12250 Mfix * minipool_barrier;
12252 /* Determines if INSN is the start of a jump table. Returns the end
12253 of the TABLE or NULL_RTX. */
12255 is_jump_table (rtx insn)
12259 if (jump_to_label_p (insn)
12260 && ((table = next_real_insn (JUMP_LABEL (insn)))
12261 == next_real_insn (insn))
12263 && GET_CODE (table) == JUMP_INSN
12264 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12265 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12271 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12272 #define JUMP_TABLES_IN_TEXT_SECTION 0
12275 static HOST_WIDE_INT
12276 get_jump_table_size (rtx insn)
12278 /* ADDR_VECs only take room if read-only data does into the text
12280 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12282 rtx body = PATTERN (insn);
12283 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12284 HOST_WIDE_INT size;
12285 HOST_WIDE_INT modesize;
12287 modesize = GET_MODE_SIZE (GET_MODE (body));
12288 size = modesize * XVECLEN (body, elt);
12292 /* Round up size of TBB table to a halfword boundary. */
12293 size = (size + 1) & ~(HOST_WIDE_INT)1;
12296 /* No padding necessary for TBH. */
12299 /* Add two bytes for alignment on Thumb. */
12304 gcc_unreachable ();
12312 /* Return the maximum amount of padding that will be inserted before
12315 static HOST_WIDE_INT
12316 get_label_padding (rtx label)
12318 HOST_WIDE_INT align, min_insn_size;
12320 align = 1 << label_to_alignment (label);
12321 min_insn_size = TARGET_THUMB ? 2 : 4;
12322 return align > min_insn_size ? align - min_insn_size : 0;
12325 /* Move a minipool fix MP from its current location to before MAX_MP.
12326 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12327 constraints may need updating. */
12329 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12330 HOST_WIDE_INT max_address)
12332 /* The code below assumes these are different. */
12333 gcc_assert (mp != max_mp);
12335 if (max_mp == NULL)
12337 if (max_address < mp->max_address)
12338 mp->max_address = max_address;
12342 if (max_address > max_mp->max_address - mp->fix_size)
12343 mp->max_address = max_mp->max_address - mp->fix_size;
12345 mp->max_address = max_address;
12347 /* Unlink MP from its current position. Since max_mp is non-null,
12348 mp->prev must be non-null. */
12349 mp->prev->next = mp->next;
12350 if (mp->next != NULL)
12351 mp->next->prev = mp->prev;
12353 minipool_vector_tail = mp->prev;
12355 /* Re-insert it before MAX_MP. */
12357 mp->prev = max_mp->prev;
12360 if (mp->prev != NULL)
12361 mp->prev->next = mp;
12363 minipool_vector_head = mp;
12366 /* Save the new entry. */
12369 /* Scan over the preceding entries and adjust their addresses as
12371 while (mp->prev != NULL
12372 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12374 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12381 /* Add a constant to the minipool for a forward reference. Returns the
12382 node added or NULL if the constant will not fit in this pool. */
12384 add_minipool_forward_ref (Mfix *fix)
12386 /* If set, max_mp is the first pool_entry that has a lower
12387 constraint than the one we are trying to add. */
12388 Mnode * max_mp = NULL;
12389 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12392 /* If the minipool starts before the end of FIX->INSN then this FIX
12393 can not be placed into the current pool. Furthermore, adding the
12394 new constant pool entry may cause the pool to start FIX_SIZE bytes
12396 if (minipool_vector_head &&
12397 (fix->address + get_attr_length (fix->insn)
12398 >= minipool_vector_head->max_address - fix->fix_size))
12401 /* Scan the pool to see if a constant with the same value has
12402 already been added. While we are doing this, also note the
12403 location where we must insert the constant if it doesn't already
12405 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12407 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12408 && fix->mode == mp->mode
12409 && (GET_CODE (fix->value) != CODE_LABEL
12410 || (CODE_LABEL_NUMBER (fix->value)
12411 == CODE_LABEL_NUMBER (mp->value)))
12412 && rtx_equal_p (fix->value, mp->value))
12414 /* More than one fix references this entry. */
12416 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12419 /* Note the insertion point if necessary. */
12421 && mp->max_address > max_address)
12424 /* If we are inserting an 8-bytes aligned quantity and
12425 we have not already found an insertion point, then
12426 make sure that all such 8-byte aligned quantities are
12427 placed at the start of the pool. */
12428 if (ARM_DOUBLEWORD_ALIGN
12430 && fix->fix_size >= 8
12431 && mp->fix_size < 8)
12434 max_address = mp->max_address;
12438 /* The value is not currently in the minipool, so we need to create
12439 a new entry for it. If MAX_MP is NULL, the entry will be put on
12440 the end of the list since the placement is less constrained than
12441 any existing entry. Otherwise, we insert the new fix before
12442 MAX_MP and, if necessary, adjust the constraints on the other
12445 mp->fix_size = fix->fix_size;
12446 mp->mode = fix->mode;
12447 mp->value = fix->value;
12449 /* Not yet required for a backwards ref. */
12450 mp->min_address = -65536;
12452 if (max_mp == NULL)
12454 mp->max_address = max_address;
12456 mp->prev = minipool_vector_tail;
12458 if (mp->prev == NULL)
12460 minipool_vector_head = mp;
12461 minipool_vector_label = gen_label_rtx ();
12464 mp->prev->next = mp;
12466 minipool_vector_tail = mp;
12470 if (max_address > max_mp->max_address - mp->fix_size)
12471 mp->max_address = max_mp->max_address - mp->fix_size;
12473 mp->max_address = max_address;
12476 mp->prev = max_mp->prev;
12478 if (mp->prev != NULL)
12479 mp->prev->next = mp;
12481 minipool_vector_head = mp;
12484 /* Save the new entry. */
12487 /* Scan over the preceding entries and adjust their addresses as
12489 while (mp->prev != NULL
12490 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12492 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12500 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12501 HOST_WIDE_INT min_address)
12503 HOST_WIDE_INT offset;
12505 /* The code below assumes these are different. */
12506 gcc_assert (mp != min_mp);
12508 if (min_mp == NULL)
12510 if (min_address > mp->min_address)
12511 mp->min_address = min_address;
12515 /* We will adjust this below if it is too loose. */
12516 mp->min_address = min_address;
12518 /* Unlink MP from its current position. Since min_mp is non-null,
12519 mp->next must be non-null. */
12520 mp->next->prev = mp->prev;
12521 if (mp->prev != NULL)
12522 mp->prev->next = mp->next;
12524 minipool_vector_head = mp->next;
12526 /* Reinsert it after MIN_MP. */
12528 mp->next = min_mp->next;
12530 if (mp->next != NULL)
12531 mp->next->prev = mp;
12533 minipool_vector_tail = mp;
12539 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12541 mp->offset = offset;
12542 if (mp->refcount > 0)
12543 offset += mp->fix_size;
12545 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12546 mp->next->min_address = mp->min_address + mp->fix_size;
12552 /* Add a constant to the minipool for a backward reference. Returns the
12553 node added or NULL if the constant will not fit in this pool.
12555 Note that the code for insertion for a backwards reference can be
12556 somewhat confusing because the calculated offsets for each fix do
12557 not take into account the size of the pool (which is still under
12560 add_minipool_backward_ref (Mfix *fix)
12562 /* If set, min_mp is the last pool_entry that has a lower constraint
12563 than the one we are trying to add. */
12564 Mnode *min_mp = NULL;
12565 /* This can be negative, since it is only a constraint. */
12566 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12569 /* If we can't reach the current pool from this insn, or if we can't
12570 insert this entry at the end of the pool without pushing other
12571 fixes out of range, then we don't try. This ensures that we
12572 can't fail later on. */
12573 if (min_address >= minipool_barrier->address
12574 || (minipool_vector_tail->min_address + fix->fix_size
12575 >= minipool_barrier->address))
12578 /* Scan the pool to see if a constant with the same value has
12579 already been added. While we are doing this, also note the
12580 location where we must insert the constant if it doesn't already
12582 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12584 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12585 && fix->mode == mp->mode
12586 && (GET_CODE (fix->value) != CODE_LABEL
12587 || (CODE_LABEL_NUMBER (fix->value)
12588 == CODE_LABEL_NUMBER (mp->value)))
12589 && rtx_equal_p (fix->value, mp->value)
12590 /* Check that there is enough slack to move this entry to the
12591 end of the table (this is conservative). */
12592 && (mp->max_address
12593 > (minipool_barrier->address
12594 + minipool_vector_tail->offset
12595 + minipool_vector_tail->fix_size)))
12598 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12601 if (min_mp != NULL)
12602 mp->min_address += fix->fix_size;
12605 /* Note the insertion point if necessary. */
12606 if (mp->min_address < min_address)
12608 /* For now, we do not allow the insertion of 8-byte alignment
12609 requiring nodes anywhere but at the start of the pool. */
12610 if (ARM_DOUBLEWORD_ALIGN
12611 && fix->fix_size >= 8 && mp->fix_size < 8)
12616 else if (mp->max_address
12617 < minipool_barrier->address + mp->offset + fix->fix_size)
12619 /* Inserting before this entry would push the fix beyond
12620 its maximum address (which can happen if we have
12621 re-located a forwards fix); force the new fix to come
12623 if (ARM_DOUBLEWORD_ALIGN
12624 && fix->fix_size >= 8 && mp->fix_size < 8)
12629 min_address = mp->min_address + fix->fix_size;
12632 /* Do not insert a non-8-byte aligned quantity before 8-byte
12633 aligned quantities. */
12634 else if (ARM_DOUBLEWORD_ALIGN
12635 && fix->fix_size < 8
12636 && mp->fix_size >= 8)
12639 min_address = mp->min_address + fix->fix_size;
12644 /* We need to create a new entry. */
12646 mp->fix_size = fix->fix_size;
12647 mp->mode = fix->mode;
12648 mp->value = fix->value;
12650 mp->max_address = minipool_barrier->address + 65536;
12652 mp->min_address = min_address;
12654 if (min_mp == NULL)
12657 mp->next = minipool_vector_head;
12659 if (mp->next == NULL)
12661 minipool_vector_tail = mp;
12662 minipool_vector_label = gen_label_rtx ();
12665 mp->next->prev = mp;
12667 minipool_vector_head = mp;
12671 mp->next = min_mp->next;
12675 if (mp->next != NULL)
12676 mp->next->prev = mp;
12678 minipool_vector_tail = mp;
12681 /* Save the new entry. */
12689 /* Scan over the following entries and adjust their offsets. */
12690 while (mp->next != NULL)
12692 if (mp->next->min_address < mp->min_address + mp->fix_size)
12693 mp->next->min_address = mp->min_address + mp->fix_size;
12696 mp->next->offset = mp->offset + mp->fix_size;
12698 mp->next->offset = mp->offset;
12707 assign_minipool_offsets (Mfix *barrier)
12709 HOST_WIDE_INT offset = 0;
12712 minipool_barrier = barrier;
12714 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12716 mp->offset = offset;
12718 if (mp->refcount > 0)
12719 offset += mp->fix_size;
12723 /* Output the literal table */
12725 dump_minipool (rtx scan)
12731 if (ARM_DOUBLEWORD_ALIGN)
12732 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12733 if (mp->refcount > 0 && mp->fix_size >= 8)
12740 fprintf (dump_file,
12741 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12742 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12744 scan = emit_label_after (gen_label_rtx (), scan);
12745 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12746 scan = emit_label_after (minipool_vector_label, scan);
12748 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12750 if (mp->refcount > 0)
12754 fprintf (dump_file,
12755 ";; Offset %u, min %ld, max %ld ",
12756 (unsigned) mp->offset, (unsigned long) mp->min_address,
12757 (unsigned long) mp->max_address);
12758 arm_print_value (dump_file, mp->value);
12759 fputc ('\n', dump_file);
12762 switch (mp->fix_size)
12764 #ifdef HAVE_consttable_1
12766 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12770 #ifdef HAVE_consttable_2
12772 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12776 #ifdef HAVE_consttable_4
12778 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12782 #ifdef HAVE_consttable_8
12784 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12788 #ifdef HAVE_consttable_16
12790 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12795 gcc_unreachable ();
12803 minipool_vector_head = minipool_vector_tail = NULL;
12804 scan = emit_insn_after (gen_consttable_end (), scan);
12805 scan = emit_barrier_after (scan);
12808 /* Return the cost of forcibly inserting a barrier after INSN. */
12810 arm_barrier_cost (rtx insn)
12812 /* Basing the location of the pool on the loop depth is preferable,
12813 but at the moment, the basic block information seems to be
12814 corrupt by this stage of the compilation. */
12815 int base_cost = 50;
12816 rtx next = next_nonnote_insn (insn);
12818 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12821 switch (GET_CODE (insn))
12824 /* It will always be better to place the table before the label, rather
12833 return base_cost - 10;
12836 return base_cost + 10;
12840 /* Find the best place in the insn stream in the range
12841 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12842 Create the barrier by inserting a jump and add a new fix entry for
12845 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12847 HOST_WIDE_INT count = 0;
12849 rtx from = fix->insn;
12850 /* The instruction after which we will insert the jump. */
12851 rtx selected = NULL;
12853 /* The address at which the jump instruction will be placed. */
12854 HOST_WIDE_INT selected_address;
12856 HOST_WIDE_INT max_count = max_address - fix->address;
12857 rtx label = gen_label_rtx ();
12859 selected_cost = arm_barrier_cost (from);
12860 selected_address = fix->address;
12862 while (from && count < max_count)
12867 /* This code shouldn't have been called if there was a natural barrier
12869 gcc_assert (GET_CODE (from) != BARRIER);
12871 /* Count the length of this insn. This must stay in sync with the
12872 code that pushes minipool fixes. */
12873 if (LABEL_P (from))
12874 count += get_label_padding (from);
12876 count += get_attr_length (from);
12878 /* If there is a jump table, add its length. */
12879 tmp = is_jump_table (from);
12882 count += get_jump_table_size (tmp);
12884 /* Jump tables aren't in a basic block, so base the cost on
12885 the dispatch insn. If we select this location, we will
12886 still put the pool after the table. */
12887 new_cost = arm_barrier_cost (from);
12889 if (count < max_count
12890 && (!selected || new_cost <= selected_cost))
12893 selected_cost = new_cost;
12894 selected_address = fix->address + count;
12897 /* Continue after the dispatch table. */
12898 from = NEXT_INSN (tmp);
12902 new_cost = arm_barrier_cost (from);
12904 if (count < max_count
12905 && (!selected || new_cost <= selected_cost))
12908 selected_cost = new_cost;
12909 selected_address = fix->address + count;
12912 from = NEXT_INSN (from);
12915 /* Make sure that we found a place to insert the jump. */
12916 gcc_assert (selected);
12918 /* Make sure we do not split a call and its corresponding
12919 CALL_ARG_LOCATION note. */
12920 if (CALL_P (selected))
12922 rtx next = NEXT_INSN (selected);
12923 if (next && NOTE_P (next)
12924 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12928 /* Create a new JUMP_INSN that branches around a barrier. */
12929 from = emit_jump_insn_after (gen_jump (label), selected);
12930 JUMP_LABEL (from) = label;
12931 barrier = emit_barrier_after (from);
12932 emit_label_after (label, barrier);
12934 /* Create a minipool barrier entry for the new barrier. */
12935 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12936 new_fix->insn = barrier;
12937 new_fix->address = selected_address;
12938 new_fix->next = fix->next;
12939 fix->next = new_fix;
12944 /* Record that there is a natural barrier in the insn stream at
12947 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12949 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12952 fix->address = address;
12955 if (minipool_fix_head != NULL)
12956 minipool_fix_tail->next = fix;
12958 minipool_fix_head = fix;
12960 minipool_fix_tail = fix;
12963 /* Record INSN, which will need fixing up to load a value from the
12964 minipool. ADDRESS is the offset of the insn since the start of the
12965 function; LOC is a pointer to the part of the insn which requires
12966 fixing; VALUE is the constant that must be loaded, which is of type
12969 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12970 enum machine_mode mode, rtx value)
12972 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12975 fix->address = address;
12978 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12979 fix->value = value;
12980 fix->forwards = get_attr_pool_range (insn);
12981 fix->backwards = get_attr_neg_pool_range (insn);
12982 fix->minipool = NULL;
12984 /* If an insn doesn't have a range defined for it, then it isn't
12985 expecting to be reworked by this code. Better to stop now than
12986 to generate duff assembly code. */
12987 gcc_assert (fix->forwards || fix->backwards);
12989 /* If an entry requires 8-byte alignment then assume all constant pools
12990 require 4 bytes of padding. Trying to do this later on a per-pool
12991 basis is awkward because existing pool entries have to be modified. */
12992 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12997 fprintf (dump_file,
12998 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12999 GET_MODE_NAME (mode),
13000 INSN_UID (insn), (unsigned long) address,
13001 -1 * (long)fix->backwards, (long)fix->forwards);
13002 arm_print_value (dump_file, fix->value);
13003 fprintf (dump_file, "\n");
13006 /* Add it to the chain of fixes. */
13009 if (minipool_fix_head != NULL)
13010 minipool_fix_tail->next = fix;
13012 minipool_fix_head = fix;
13014 minipool_fix_tail = fix;
13017 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13018 Returns the number of insns needed, or 99 if we don't know how to
13021 arm_const_double_inline_cost (rtx val)
13023 rtx lowpart, highpart;
13024 enum machine_mode mode;
13026 mode = GET_MODE (val);
13028 if (mode == VOIDmode)
13031 gcc_assert (GET_MODE_SIZE (mode) == 8);
13033 lowpart = gen_lowpart (SImode, val);
13034 highpart = gen_highpart_mode (SImode, mode, val);
13036 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13037 gcc_assert (GET_CODE (highpart) == CONST_INT);
13039 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13040 NULL_RTX, NULL_RTX, 0, 0)
13041 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13042 NULL_RTX, NULL_RTX, 0, 0));
13045 /* Return true if it is worthwhile to split a 64-bit constant into two
13046 32-bit operations. This is the case if optimizing for size, or
13047 if we have load delay slots, or if one 32-bit part can be done with
13048 a single data operation. */
13050 arm_const_double_by_parts (rtx val)
13052 enum machine_mode mode = GET_MODE (val);
13055 if (optimize_size || arm_ld_sched)
13058 if (mode == VOIDmode)
13061 part = gen_highpart_mode (SImode, mode, val);
13063 gcc_assert (GET_CODE (part) == CONST_INT);
13065 if (const_ok_for_arm (INTVAL (part))
13066 || const_ok_for_arm (~INTVAL (part)))
13069 part = gen_lowpart (SImode, val);
13071 gcc_assert (GET_CODE (part) == CONST_INT);
13073 if (const_ok_for_arm (INTVAL (part))
13074 || const_ok_for_arm (~INTVAL (part)))
13080 /* Return true if it is possible to inline both the high and low parts
13081 of a 64-bit constant into 32-bit data processing instructions. */
13083 arm_const_double_by_immediates (rtx val)
13085 enum machine_mode mode = GET_MODE (val);
13088 if (mode == VOIDmode)
13091 part = gen_highpart_mode (SImode, mode, val);
13093 gcc_assert (GET_CODE (part) == CONST_INT);
13095 if (!const_ok_for_arm (INTVAL (part)))
13098 part = gen_lowpart (SImode, val);
13100 gcc_assert (GET_CODE (part) == CONST_INT);
13102 if (!const_ok_for_arm (INTVAL (part)))
13108 /* Scan INSN and note any of its operands that need fixing.
13109 If DO_PUSHES is false we do not actually push any of the fixups
13112 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13116 extract_insn (insn);
13118 if (!constrain_operands (1))
13119 fatal_insn_not_found (insn);
13121 if (recog_data.n_alternatives == 0)
13124 /* Fill in recog_op_alt with information about the constraints of
13126 preprocess_constraints ();
13128 for (opno = 0; opno < recog_data.n_operands; opno++)
13130 /* Things we need to fix can only occur in inputs. */
13131 if (recog_data.operand_type[opno] != OP_IN)
13134 /* If this alternative is a memory reference, then any mention
13135 of constants in this alternative is really to fool reload
13136 into allowing us to accept one there. We need to fix them up
13137 now so that we output the right code. */
13138 if (recog_op_alt[opno][which_alternative].memory_ok)
13140 rtx op = recog_data.operand[opno];
13142 if (CONSTANT_P (op))
13145 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13146 recog_data.operand_mode[opno], op);
13148 else if (GET_CODE (op) == MEM
13149 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13150 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13154 rtx cop = avoid_constant_pool_reference (op);
13156 /* Casting the address of something to a mode narrower
13157 than a word can cause avoid_constant_pool_reference()
13158 to return the pool reference itself. That's no good to
13159 us here. Lets just hope that we can use the
13160 constant pool value directly. */
13162 cop = get_pool_constant (XEXP (op, 0));
13164 push_minipool_fix (insn, address,
13165 recog_data.operand_loc[opno],
13166 recog_data.operand_mode[opno], cop);
13176 /* Convert instructions to their cc-clobbering variant if possible, since
13177 that allows us to use smaller encodings. */
13180 thumb2_reorg (void)
13185 INIT_REG_SET (&live);
13187 /* We are freeing block_for_insn in the toplev to keep compatibility
13188 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13189 compute_bb_for_insn ();
13196 COPY_REG_SET (&live, DF_LR_OUT (bb));
13197 df_simulate_initialize_backwards (bb, &live);
13198 FOR_BB_INSNS_REVERSE (bb, insn)
13200 if (NONJUMP_INSN_P (insn)
13201 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13202 && GET_CODE (PATTERN (insn)) == SET)
13204 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13205 rtx pat = PATTERN (insn);
13206 rtx dst = XEXP (pat, 0);
13207 rtx src = XEXP (pat, 1);
13208 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13210 if (!OBJECT_P (src))
13211 op0 = XEXP (src, 0);
13213 if (BINARY_P (src))
13214 op1 = XEXP (src, 1);
13216 if (low_register_operand (dst, SImode))
13218 switch (GET_CODE (src))
13221 if (low_register_operand (op0, SImode))
13223 /* ADDS <Rd>,<Rn>,<Rm> */
13224 if (low_register_operand (op1, SImode))
13226 /* ADDS <Rdn>,#<imm8> */
13227 /* SUBS <Rdn>,#<imm8> */
13228 else if (rtx_equal_p (dst, op0)
13229 && CONST_INT_P (op1)
13230 && IN_RANGE (INTVAL (op1), -255, 255))
13232 /* ADDS <Rd>,<Rn>,#<imm3> */
13233 /* SUBS <Rd>,<Rn>,#<imm3> */
13234 else if (CONST_INT_P (op1)
13235 && IN_RANGE (INTVAL (op1), -7, 7))
13241 /* RSBS <Rd>,<Rn>,#0
13242 Not handled here: see NEG below. */
13243 /* SUBS <Rd>,<Rn>,#<imm3>
13245 Not handled here: see PLUS above. */
13246 /* SUBS <Rd>,<Rn>,<Rm> */
13247 if (low_register_operand (op0, SImode)
13248 && low_register_operand (op1, SImode))
13253 /* MULS <Rdm>,<Rn>,<Rdm>
13254 As an exception to the rule, this is only used
13255 when optimizing for size since MULS is slow on all
13256 known implementations. We do not even want to use
13257 MULS in cold code, if optimizing for speed, so we
13258 test the global flag here. */
13259 if (!optimize_size)
13261 /* else fall through. */
13265 /* ANDS <Rdn>,<Rm> */
13266 if (rtx_equal_p (dst, op0)
13267 && low_register_operand (op1, SImode))
13269 else if (rtx_equal_p (dst, op1)
13270 && low_register_operand (op0, SImode))
13271 action = SWAP_CONV;
13277 /* ASRS <Rdn>,<Rm> */
13278 /* LSRS <Rdn>,<Rm> */
13279 /* LSLS <Rdn>,<Rm> */
13280 if (rtx_equal_p (dst, op0)
13281 && low_register_operand (op1, SImode))
13283 /* ASRS <Rd>,<Rm>,#<imm5> */
13284 /* LSRS <Rd>,<Rm>,#<imm5> */
13285 /* LSLS <Rd>,<Rm>,#<imm5> */
13286 else if (low_register_operand (op0, SImode)
13287 && CONST_INT_P (op1)
13288 && IN_RANGE (INTVAL (op1), 0, 31))
13293 /* RORS <Rdn>,<Rm> */
13294 if (rtx_equal_p (dst, op0)
13295 && low_register_operand (op1, SImode))
13301 /* MVNS <Rd>,<Rm> */
13302 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13303 if (low_register_operand (op0, SImode))
13308 /* MOVS <Rd>,#<imm8> */
13309 if (CONST_INT_P (src)
13310 && IN_RANGE (INTVAL (src), 0, 255))
13315 /* MOVS and MOV<c> with registers have different
13316 encodings, so are not relevant here. */
13324 if (action != SKIP)
13326 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13327 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13330 if (action == SWAP_CONV)
13332 src = copy_rtx (src);
13333 XEXP (src, 0) = op1;
13334 XEXP (src, 1) = op0;
13335 pat = gen_rtx_SET (VOIDmode, dst, src);
13336 vec = gen_rtvec (2, pat, clobber);
13338 else /* action == CONV */
13339 vec = gen_rtvec (2, pat, clobber);
13341 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13342 INSN_CODE (insn) = -1;
13346 if (NONDEBUG_INSN_P (insn))
13347 df_simulate_one_insn_backwards (bb, insn, &live);
13351 CLEAR_REG_SET (&live);
13354 /* Gcc puts the pool in the wrong place for ARM, since we can only
13355 load addresses a limited distance around the pc. We do some
13356 special munging to move the constant pool values to the correct
13357 point in the code. */
13362 HOST_WIDE_INT address = 0;
13368 minipool_fix_head = minipool_fix_tail = NULL;
13370 /* The first insn must always be a note, or the code below won't
13371 scan it properly. */
13372 insn = get_insns ();
13373 gcc_assert (GET_CODE (insn) == NOTE);
13376 /* Scan all the insns and record the operands that will need fixing. */
13377 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13379 if (GET_CODE (insn) == BARRIER)
13380 push_minipool_barrier (insn, address);
13381 else if (INSN_P (insn))
13385 note_invalid_constants (insn, address, true);
13386 address += get_attr_length (insn);
13388 /* If the insn is a vector jump, add the size of the table
13389 and skip the table. */
13390 if ((table = is_jump_table (insn)) != NULL)
13392 address += get_jump_table_size (table);
13396 else if (LABEL_P (insn))
13397 /* Add the worst-case padding due to alignment. We don't add
13398 the _current_ padding because the minipool insertions
13399 themselves might change it. */
13400 address += get_label_padding (insn);
13403 fix = minipool_fix_head;
13405 /* Now scan the fixups and perform the required changes. */
13410 Mfix * last_added_fix;
13411 Mfix * last_barrier = NULL;
13414 /* Skip any further barriers before the next fix. */
13415 while (fix && GET_CODE (fix->insn) == BARRIER)
13418 /* No more fixes. */
13422 last_added_fix = NULL;
13424 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13426 if (GET_CODE (ftmp->insn) == BARRIER)
13428 if (ftmp->address >= minipool_vector_head->max_address)
13431 last_barrier = ftmp;
13433 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13436 last_added_fix = ftmp; /* Keep track of the last fix added. */
13439 /* If we found a barrier, drop back to that; any fixes that we
13440 could have reached but come after the barrier will now go in
13441 the next mini-pool. */
13442 if (last_barrier != NULL)
13444 /* Reduce the refcount for those fixes that won't go into this
13446 for (fdel = last_barrier->next;
13447 fdel && fdel != ftmp;
13450 fdel->minipool->refcount--;
13451 fdel->minipool = NULL;
13454 ftmp = last_barrier;
13458 /* ftmp is first fix that we can't fit into this pool and
13459 there no natural barriers that we could use. Insert a
13460 new barrier in the code somewhere between the previous
13461 fix and this one, and arrange to jump around it. */
13462 HOST_WIDE_INT max_address;
13464 /* The last item on the list of fixes must be a barrier, so
13465 we can never run off the end of the list of fixes without
13466 last_barrier being set. */
13469 max_address = minipool_vector_head->max_address;
13470 /* Check that there isn't another fix that is in range that
13471 we couldn't fit into this pool because the pool was
13472 already too large: we need to put the pool before such an
13473 instruction. The pool itself may come just after the
13474 fix because create_fix_barrier also allows space for a
13475 jump instruction. */
13476 if (ftmp->address < max_address)
13477 max_address = ftmp->address + 1;
13479 last_barrier = create_fix_barrier (last_added_fix, max_address);
13482 assign_minipool_offsets (last_barrier);
13486 if (GET_CODE (ftmp->insn) != BARRIER
13487 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13494 /* Scan over the fixes we have identified for this pool, fixing them
13495 up and adding the constants to the pool itself. */
13496 for (this_fix = fix; this_fix && ftmp != this_fix;
13497 this_fix = this_fix->next)
13498 if (GET_CODE (this_fix->insn) != BARRIER)
13501 = plus_constant (Pmode,
13502 gen_rtx_LABEL_REF (VOIDmode,
13503 minipool_vector_label),
13504 this_fix->minipool->offset);
13505 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13508 dump_minipool (last_barrier->insn);
13512 /* From now on we must synthesize any constants that we can't handle
13513 directly. This can happen if the RTL gets split during final
13514 instruction generation. */
13515 after_arm_reorg = 1;
13517 /* Free the minipool memory. */
13518 obstack_free (&minipool_obstack, minipool_startobj);
13521 /* Routines to output assembly language. */
13523 /* If the rtx is the correct value then return the string of the number.
13524 In this way we can ensure that valid double constants are generated even
13525 when cross compiling. */
13527 fp_immediate_constant (rtx x)
13531 if (!fp_consts_inited)
13534 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13536 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13540 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13541 static const char *
13542 fp_const_from_val (REAL_VALUE_TYPE *r)
13544 if (!fp_consts_inited)
13547 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13551 /* OPERANDS[0] is the entire list of insns that constitute pop,
13552 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13553 is in the list, UPDATE is true iff the list contains explicit
13554 update of base register. */
13556 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13562 const char *conditional;
13563 int num_saves = XVECLEN (operands[0], 0);
13564 unsigned int regno;
13565 unsigned int regno_base = REGNO (operands[1]);
13568 offset += update ? 1 : 0;
13569 offset += return_pc ? 1 : 0;
13571 /* Is the base register in the list? */
13572 for (i = offset; i < num_saves; i++)
13574 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13575 /* If SP is in the list, then the base register must be SP. */
13576 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13577 /* If base register is in the list, there must be no explicit update. */
13578 if (regno == regno_base)
13579 gcc_assert (!update);
13582 conditional = reverse ? "%?%D0" : "%?%d0";
13583 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13585 /* Output pop (not stmfd) because it has a shorter encoding. */
13586 gcc_assert (update);
13587 sprintf (pattern, "pop%s\t{", conditional);
13591 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13592 It's just a convention, their semantics are identical. */
13593 if (regno_base == SP_REGNUM)
13594 sprintf (pattern, "ldm%sfd\t", conditional);
13595 else if (TARGET_UNIFIED_ASM)
13596 sprintf (pattern, "ldmia%s\t", conditional);
13598 sprintf (pattern, "ldm%sia\t", conditional);
13600 strcat (pattern, reg_names[regno_base]);
13602 strcat (pattern, "!, {");
13604 strcat (pattern, ", {");
13607 /* Output the first destination register. */
13609 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13611 /* Output the rest of the destination registers. */
13612 for (i = offset + 1; i < num_saves; i++)
13614 strcat (pattern, ", ");
13616 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13619 strcat (pattern, "}");
13621 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13622 strcat (pattern, "^");
13624 output_asm_insn (pattern, &cond);
13628 /* Output the assembly for a store multiple. */
13631 vfp_output_fstmd (rtx * operands)
13638 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13639 p = strlen (pattern);
13641 gcc_assert (GET_CODE (operands[1]) == REG);
13643 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13644 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13646 p += sprintf (&pattern[p], ", d%d", base + i);
13648 strcpy (&pattern[p], "}");
13650 output_asm_insn (pattern, operands);
13655 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13656 number of bytes pushed. */
13659 vfp_emit_fstmd (int base_reg, int count)
13666 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13667 register pairs are stored by a store multiple insn. We avoid this
13668 by pushing an extra pair. */
13669 if (count == 2 && !arm_arch6)
13671 if (base_reg == LAST_VFP_REGNUM - 3)
13676 /* FSTMD may not store more than 16 doubleword registers at once. Split
13677 larger stores into multiple parts (up to a maximum of two, in
13682 /* NOTE: base_reg is an internal register number, so each D register
13684 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13685 saved += vfp_emit_fstmd (base_reg, 16);
13689 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13690 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13692 reg = gen_rtx_REG (DFmode, base_reg);
13695 XVECEXP (par, 0, 0)
13696 = gen_rtx_SET (VOIDmode,
13699 gen_rtx_PRE_MODIFY (Pmode,
13702 (Pmode, stack_pointer_rtx,
13705 gen_rtx_UNSPEC (BLKmode,
13706 gen_rtvec (1, reg),
13707 UNSPEC_PUSH_MULT));
13709 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13710 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13711 RTX_FRAME_RELATED_P (tmp) = 1;
13712 XVECEXP (dwarf, 0, 0) = tmp;
13714 tmp = gen_rtx_SET (VOIDmode,
13715 gen_frame_mem (DFmode, stack_pointer_rtx),
13717 RTX_FRAME_RELATED_P (tmp) = 1;
13718 XVECEXP (dwarf, 0, 1) = tmp;
13720 for (i = 1; i < count; i++)
13722 reg = gen_rtx_REG (DFmode, base_reg);
13724 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13726 tmp = gen_rtx_SET (VOIDmode,
13727 gen_frame_mem (DFmode,
13728 plus_constant (Pmode,
13732 RTX_FRAME_RELATED_P (tmp) = 1;
13733 XVECEXP (dwarf, 0, i + 1) = tmp;
13736 par = emit_insn (par);
13737 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13738 RTX_FRAME_RELATED_P (par) = 1;
13743 /* Emit a call instruction with pattern PAT. ADDR is the address of
13744 the call target. */
13747 arm_emit_call_insn (rtx pat, rtx addr)
13751 insn = emit_call_insn (pat);
13753 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13754 If the call might use such an entry, add a use of the PIC register
13755 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13756 if (TARGET_VXWORKS_RTP
13758 && GET_CODE (addr) == SYMBOL_REF
13759 && (SYMBOL_REF_DECL (addr)
13760 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13761 : !SYMBOL_REF_LOCAL_P (addr)))
13763 require_pic_register ();
13764 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13768 /* Output a 'call' insn. */
13770 output_call (rtx *operands)
13772 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13774 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13775 if (REGNO (operands[0]) == LR_REGNUM)
13777 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13778 output_asm_insn ("mov%?\t%0, %|lr", operands);
13781 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13783 if (TARGET_INTERWORK || arm_arch4t)
13784 output_asm_insn ("bx%?\t%0", operands);
13786 output_asm_insn ("mov%?\t%|pc, %0", operands);
13791 /* Output a 'call' insn that is a reference in memory. This is
13792 disabled for ARMv5 and we prefer a blx instead because otherwise
13793 there's a significant performance overhead. */
13795 output_call_mem (rtx *operands)
13797 gcc_assert (!arm_arch5);
13798 if (TARGET_INTERWORK)
13800 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13801 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13802 output_asm_insn ("bx%?\t%|ip", operands);
13804 else if (regno_use_in (LR_REGNUM, operands[0]))
13806 /* LR is used in the memory address. We load the address in the
13807 first instruction. It's safe to use IP as the target of the
13808 load since the call will kill it anyway. */
13809 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13810 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13812 output_asm_insn ("bx%?\t%|ip", operands);
13814 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13818 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13819 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13826 /* Output a move from arm registers to arm registers of a long double
13827 OPERANDS[0] is the destination.
13828 OPERANDS[1] is the source. */
13830 output_mov_long_double_arm_from_arm (rtx *operands)
13832 /* We have to be careful here because the two might overlap. */
13833 int dest_start = REGNO (operands[0]);
13834 int src_start = REGNO (operands[1]);
13838 if (dest_start < src_start)
13840 for (i = 0; i < 3; i++)
13842 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13843 ops[1] = gen_rtx_REG (SImode, src_start + i);
13844 output_asm_insn ("mov%?\t%0, %1", ops);
13849 for (i = 2; i >= 0; i--)
13851 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13852 ops[1] = gen_rtx_REG (SImode, src_start + i);
13853 output_asm_insn ("mov%?\t%0, %1", ops);
13861 arm_emit_movpair (rtx dest, rtx src)
13863 /* If the src is an immediate, simplify it. */
13864 if (CONST_INT_P (src))
13866 HOST_WIDE_INT val = INTVAL (src);
13867 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13868 if ((val >> 16) & 0x0000ffff)
13869 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13871 GEN_INT ((val >> 16) & 0x0000ffff));
13874 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13875 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13878 /* Output a move between double words. It must be REG<-MEM
13881 output_move_double (rtx *operands, bool emit, int *count)
13883 enum rtx_code code0 = GET_CODE (operands[0]);
13884 enum rtx_code code1 = GET_CODE (operands[1]);
13889 /* The only case when this might happen is when
13890 you are looking at the length of a DImode instruction
13891 that has an invalid constant in it. */
13892 if (code0 == REG && code1 != MEM)
13894 gcc_assert (!emit);
13901 unsigned int reg0 = REGNO (operands[0]);
13903 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13905 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13907 switch (GET_CODE (XEXP (operands[1], 0)))
13914 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13915 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13917 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13922 gcc_assert (TARGET_LDRD);
13924 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13931 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13933 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13941 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13943 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13948 gcc_assert (TARGET_LDRD);
13950 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13955 /* Autoicrement addressing modes should never have overlapping
13956 base and destination registers, and overlapping index registers
13957 are already prohibited, so this doesn't need to worry about
13959 otherops[0] = operands[0];
13960 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13961 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13963 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13965 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13967 /* Registers overlap so split out the increment. */
13970 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13971 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13978 /* Use a single insn if we can.
13979 FIXME: IWMMXT allows offsets larger than ldrd can
13980 handle, fix these up with a pair of ldr. */
13982 || GET_CODE (otherops[2]) != CONST_INT
13983 || (INTVAL (otherops[2]) > -256
13984 && INTVAL (otherops[2]) < 256))
13987 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13993 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13994 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14004 /* Use a single insn if we can.
14005 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14006 fix these up with a pair of ldr. */
14008 || GET_CODE (otherops[2]) != CONST_INT
14009 || (INTVAL (otherops[2]) > -256
14010 && INTVAL (otherops[2]) < 256))
14013 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14019 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14020 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14030 /* We might be able to use ldrd %0, %1 here. However the range is
14031 different to ldr/adr, and it is broken on some ARMv7-M
14032 implementations. */
14033 /* Use the second register of the pair to avoid problematic
14035 otherops[1] = operands[1];
14037 output_asm_insn ("adr%?\t%0, %1", otherops);
14038 operands[1] = otherops[0];
14042 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14044 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14051 /* ??? This needs checking for thumb2. */
14053 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14054 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14056 otherops[0] = operands[0];
14057 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14058 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14060 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14062 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14064 switch ((int) INTVAL (otherops[2]))
14068 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14074 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14080 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14084 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14085 operands[1] = otherops[0];
14087 && (GET_CODE (otherops[2]) == REG
14089 || (GET_CODE (otherops[2]) == CONST_INT
14090 && INTVAL (otherops[2]) > -256
14091 && INTVAL (otherops[2]) < 256)))
14093 if (reg_overlap_mentioned_p (operands[0],
14097 /* Swap base and index registers over to
14098 avoid a conflict. */
14100 otherops[1] = otherops[2];
14103 /* If both registers conflict, it will usually
14104 have been fixed by a splitter. */
14105 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14106 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14110 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14111 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14118 otherops[0] = operands[0];
14120 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14125 if (GET_CODE (otherops[2]) == CONST_INT)
14129 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14130 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14132 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14138 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14144 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14151 return "ldr%(d%)\t%0, [%1]";
14153 return "ldm%(ia%)\t%1, %M0";
14157 otherops[1] = adjust_address (operands[1], SImode, 4);
14158 /* Take care of overlapping base/data reg. */
14159 if (reg_mentioned_p (operands[0], operands[1]))
14163 output_asm_insn ("ldr%?\t%0, %1", otherops);
14164 output_asm_insn ("ldr%?\t%0, %1", operands);
14174 output_asm_insn ("ldr%?\t%0, %1", operands);
14175 output_asm_insn ("ldr%?\t%0, %1", otherops);
14185 /* Constraints should ensure this. */
14186 gcc_assert (code0 == MEM && code1 == REG);
14187 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14189 switch (GET_CODE (XEXP (operands[0], 0)))
14195 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14197 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14202 gcc_assert (TARGET_LDRD);
14204 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14211 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14213 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14221 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14223 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14228 gcc_assert (TARGET_LDRD);
14230 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14235 otherops[0] = operands[1];
14236 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14237 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14239 /* IWMMXT allows offsets larger than ldrd can handle,
14240 fix these up with a pair of ldr. */
14242 && GET_CODE (otherops[2]) == CONST_INT
14243 && (INTVAL(otherops[2]) <= -256
14244 || INTVAL(otherops[2]) >= 256))
14246 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14250 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14251 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14260 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14261 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14267 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14270 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14275 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14280 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14281 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14283 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14287 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14294 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14301 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14306 && (GET_CODE (otherops[2]) == REG
14308 || (GET_CODE (otherops[2]) == CONST_INT
14309 && INTVAL (otherops[2]) > -256
14310 && INTVAL (otherops[2]) < 256)))
14312 otherops[0] = operands[1];
14313 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14315 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14321 otherops[0] = adjust_address (operands[0], SImode, 4);
14322 otherops[1] = operands[1];
14325 output_asm_insn ("str%?\t%1, %0", operands);
14326 output_asm_insn ("str%?\t%H1, %0", otherops);
14336 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14337 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14340 output_move_quad (rtx *operands)
14342 if (REG_P (operands[0]))
14344 /* Load, or reg->reg move. */
14346 if (MEM_P (operands[1]))
14348 switch (GET_CODE (XEXP (operands[1], 0)))
14351 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14356 output_asm_insn ("adr%?\t%0, %1", operands);
14357 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14361 gcc_unreachable ();
14369 gcc_assert (REG_P (operands[1]));
14371 dest = REGNO (operands[0]);
14372 src = REGNO (operands[1]);
14374 /* This seems pretty dumb, but hopefully GCC won't try to do it
14377 for (i = 0; i < 4; i++)
14379 ops[0] = gen_rtx_REG (SImode, dest + i);
14380 ops[1] = gen_rtx_REG (SImode, src + i);
14381 output_asm_insn ("mov%?\t%0, %1", ops);
14384 for (i = 3; i >= 0; i--)
14386 ops[0] = gen_rtx_REG (SImode, dest + i);
14387 ops[1] = gen_rtx_REG (SImode, src + i);
14388 output_asm_insn ("mov%?\t%0, %1", ops);
14394 gcc_assert (MEM_P (operands[0]));
14395 gcc_assert (REG_P (operands[1]));
14396 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14398 switch (GET_CODE (XEXP (operands[0], 0)))
14401 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14405 gcc_unreachable ();
14412 /* Output a VFP load or store instruction. */
14415 output_move_vfp (rtx *operands)
14417 rtx reg, mem, addr, ops[2];
14418 int load = REG_P (operands[0]);
14419 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14420 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14423 enum machine_mode mode;
14425 reg = operands[!load];
14426 mem = operands[load];
14428 mode = GET_MODE (reg);
14430 gcc_assert (REG_P (reg));
14431 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14432 gcc_assert (mode == SFmode
14436 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14437 gcc_assert (MEM_P (mem));
14439 addr = XEXP (mem, 0);
14441 switch (GET_CODE (addr))
14444 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14445 ops[0] = XEXP (addr, 0);
14450 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14451 ops[0] = XEXP (addr, 0);
14456 templ = "f%s%c%%?\t%%%s0, %%1%s";
14462 sprintf (buff, templ,
14463 load ? "ld" : "st",
14466 integer_p ? "\t%@ int" : "");
14467 output_asm_insn (buff, ops);
14472 /* Output a Neon quad-word load or store, or a load or store for
14473 larger structure modes.
14475 WARNING: The ordering of elements is weird in big-endian mode,
14476 because we use VSTM, as required by the EABI. GCC RTL defines
14477 element ordering based on in-memory order. This can be differ
14478 from the architectural ordering of elements within a NEON register.
14479 The intrinsics defined in arm_neon.h use the NEON register element
14480 ordering, not the GCC RTL element ordering.
14482 For example, the in-memory ordering of a big-endian a quadword
14483 vector with 16-bit elements when stored from register pair {d0,d1}
14484 will be (lowest address first, d0[N] is NEON register element N):
14486 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14488 When necessary, quadword registers (dN, dN+1) are moved to ARM
14489 registers from rN in the order:
14491 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14493 So that STM/LDM can be used on vectors in ARM registers, and the
14494 same memory layout will result as if VSTM/VLDM were used. */
14497 output_move_neon (rtx *operands)
14499 rtx reg, mem, addr, ops[2];
14500 int regno, load = REG_P (operands[0]);
14503 enum machine_mode mode;
14505 reg = operands[!load];
14506 mem = operands[load];
14508 mode = GET_MODE (reg);
14510 gcc_assert (REG_P (reg));
14511 regno = REGNO (reg);
14512 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14513 || NEON_REGNO_OK_FOR_QUAD (regno));
14514 gcc_assert (VALID_NEON_DREG_MODE (mode)
14515 || VALID_NEON_QREG_MODE (mode)
14516 || VALID_NEON_STRUCT_MODE (mode));
14517 gcc_assert (MEM_P (mem));
14519 addr = XEXP (mem, 0);
14521 /* Strip off const from addresses like (const (plus (...))). */
14522 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14523 addr = XEXP (addr, 0);
14525 switch (GET_CODE (addr))
14528 templ = "v%smia%%?\t%%0!, %%h1";
14529 ops[0] = XEXP (addr, 0);
14534 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14535 templ = "v%smdb%%?\t%%0!, %%h1";
14536 ops[0] = XEXP (addr, 0);
14541 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14542 gcc_unreachable ();
14547 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14550 for (i = 0; i < nregs; i++)
14552 /* We're only using DImode here because it's a convenient size. */
14553 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14554 ops[1] = adjust_address (mem, DImode, 8 * i);
14555 if (reg_overlap_mentioned_p (ops[0], mem))
14557 gcc_assert (overlap == -1);
14562 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14563 output_asm_insn (buff, ops);
14568 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14569 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14570 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14571 output_asm_insn (buff, ops);
14578 templ = "v%smia%%?\t%%m0, %%h1";
14583 sprintf (buff, templ, load ? "ld" : "st");
14584 output_asm_insn (buff, ops);
14589 /* Compute and return the length of neon_mov<mode>, where <mode> is
14590 one of VSTRUCT modes: EI, OI, CI or XI. */
14592 arm_attr_length_move_neon (rtx insn)
14594 rtx reg, mem, addr;
14596 enum machine_mode mode;
14598 extract_insn_cached (insn);
14600 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14602 mode = GET_MODE (recog_data.operand[0]);
14613 gcc_unreachable ();
14617 load = REG_P (recog_data.operand[0]);
14618 reg = recog_data.operand[!load];
14619 mem = recog_data.operand[load];
14621 gcc_assert (MEM_P (mem));
14623 mode = GET_MODE (reg);
14624 addr = XEXP (mem, 0);
14626 /* Strip off const from addresses like (const (plus (...))). */
14627 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14628 addr = XEXP (addr, 0);
14630 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14632 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14639 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14643 arm_address_offset_is_imm (rtx insn)
14647 extract_insn_cached (insn);
14649 if (REG_P (recog_data.operand[0]))
14652 mem = recog_data.operand[0];
14654 gcc_assert (MEM_P (mem));
14656 addr = XEXP (mem, 0);
14658 if (GET_CODE (addr) == REG
14659 || (GET_CODE (addr) == PLUS
14660 && GET_CODE (XEXP (addr, 0)) == REG
14661 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14667 /* Output an ADD r, s, #n where n may be too big for one instruction.
14668 If adding zero to one register, output nothing. */
14670 output_add_immediate (rtx *operands)
14672 HOST_WIDE_INT n = INTVAL (operands[2]);
14674 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14677 output_multi_immediate (operands,
14678 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14681 output_multi_immediate (operands,
14682 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14689 /* Output a multiple immediate operation.
14690 OPERANDS is the vector of operands referred to in the output patterns.
14691 INSTR1 is the output pattern to use for the first constant.
14692 INSTR2 is the output pattern to use for subsequent constants.
14693 IMMED_OP is the index of the constant slot in OPERANDS.
14694 N is the constant value. */
14695 static const char *
14696 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14697 int immed_op, HOST_WIDE_INT n)
14699 #if HOST_BITS_PER_WIDE_INT > 32
14705 /* Quick and easy output. */
14706 operands[immed_op] = const0_rtx;
14707 output_asm_insn (instr1, operands);
14712 const char * instr = instr1;
14714 /* Note that n is never zero here (which would give no output). */
14715 for (i = 0; i < 32; i += 2)
14719 operands[immed_op] = GEN_INT (n & (255 << i));
14720 output_asm_insn (instr, operands);
14730 /* Return the name of a shifter operation. */
14731 static const char *
14732 arm_shift_nmem(enum rtx_code code)
14737 return ARM_LSL_NAME;
14753 /* Return the appropriate ARM instruction for the operation code.
14754 The returned result should not be overwritten. OP is the rtx of the
14755 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14758 arithmetic_instr (rtx op, int shift_first_arg)
14760 switch (GET_CODE (op))
14766 return shift_first_arg ? "rsb" : "sub";
14781 return arm_shift_nmem(GET_CODE(op));
14784 gcc_unreachable ();
14788 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14789 for the operation code. The returned result should not be overwritten.
14790 OP is the rtx code of the shift.
14791 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14793 static const char *
14794 shift_op (rtx op, HOST_WIDE_INT *amountp)
14797 enum rtx_code code = GET_CODE (op);
14799 switch (GET_CODE (XEXP (op, 1)))
14807 *amountp = INTVAL (XEXP (op, 1));
14811 gcc_unreachable ();
14817 gcc_assert (*amountp != -1);
14818 *amountp = 32 - *amountp;
14821 /* Fall through. */
14827 mnem = arm_shift_nmem(code);
14831 /* We never have to worry about the amount being other than a
14832 power of 2, since this case can never be reloaded from a reg. */
14833 gcc_assert (*amountp != -1);
14834 *amountp = int_log2 (*amountp);
14835 return ARM_LSL_NAME;
14838 gcc_unreachable ();
14841 if (*amountp != -1)
14843 /* This is not 100% correct, but follows from the desire to merge
14844 multiplication by a power of 2 with the recognizer for a
14845 shift. >=32 is not a valid shift for "lsl", so we must try and
14846 output a shift that produces the correct arithmetical result.
14847 Using lsr #32 is identical except for the fact that the carry bit
14848 is not set correctly if we set the flags; but we never use the
14849 carry bit from such an operation, so we can ignore that. */
14850 if (code == ROTATERT)
14851 /* Rotate is just modulo 32. */
14853 else if (*amountp != (*amountp & 31))
14855 if (code == ASHIFT)
14860 /* Shifts of 0 are no-ops. */
14868 /* Obtain the shift from the POWER of two. */
14870 static HOST_WIDE_INT
14871 int_log2 (HOST_WIDE_INT power)
14873 HOST_WIDE_INT shift = 0;
14875 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14877 gcc_assert (shift <= 31);
14884 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14885 because /bin/as is horribly restrictive. The judgement about
14886 whether or not each character is 'printable' (and can be output as
14887 is) or not (and must be printed with an octal escape) must be made
14888 with reference to the *host* character set -- the situation is
14889 similar to that discussed in the comments above pp_c_char in
14890 c-pretty-print.c. */
14892 #define MAX_ASCII_LEN 51
14895 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14898 int len_so_far = 0;
14900 fputs ("\t.ascii\t\"", stream);
14902 for (i = 0; i < len; i++)
14906 if (len_so_far >= MAX_ASCII_LEN)
14908 fputs ("\"\n\t.ascii\t\"", stream);
14914 if (c == '\\' || c == '\"')
14916 putc ('\\', stream);
14924 fprintf (stream, "\\%03o", c);
14929 fputs ("\"\n", stream);
14932 /* Compute the register save mask for registers 0 through 12
14933 inclusive. This code is used by arm_compute_save_reg_mask. */
14935 static unsigned long
14936 arm_compute_save_reg0_reg12_mask (void)
14938 unsigned long func_type = arm_current_func_type ();
14939 unsigned long save_reg_mask = 0;
14942 if (IS_INTERRUPT (func_type))
14944 unsigned int max_reg;
14945 /* Interrupt functions must not corrupt any registers,
14946 even call clobbered ones. If this is a leaf function
14947 we can just examine the registers used by the RTL, but
14948 otherwise we have to assume that whatever function is
14949 called might clobber anything, and so we have to save
14950 all the call-clobbered registers as well. */
14951 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14952 /* FIQ handlers have registers r8 - r12 banked, so
14953 we only need to check r0 - r7, Normal ISRs only
14954 bank r14 and r15, so we must check up to r12.
14955 r13 is the stack pointer which is always preserved,
14956 so we do not need to consider it here. */
14961 for (reg = 0; reg <= max_reg; reg++)
14962 if (df_regs_ever_live_p (reg)
14963 || (! crtl->is_leaf && call_used_regs[reg]))
14964 save_reg_mask |= (1 << reg);
14966 /* Also save the pic base register if necessary. */
14968 && !TARGET_SINGLE_PIC_BASE
14969 && arm_pic_register != INVALID_REGNUM
14970 && crtl->uses_pic_offset_table)
14971 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14973 else if (IS_VOLATILE(func_type))
14975 /* For noreturn functions we historically omitted register saves
14976 altogether. However this really messes up debugging. As a
14977 compromise save just the frame pointers. Combined with the link
14978 register saved elsewhere this should be sufficient to get
14980 if (frame_pointer_needed)
14981 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14982 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14983 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14984 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14985 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14989 /* In the normal case we only need to save those registers
14990 which are call saved and which are used by this function. */
14991 for (reg = 0; reg <= 11; reg++)
14992 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14993 save_reg_mask |= (1 << reg);
14995 /* Handle the frame pointer as a special case. */
14996 if (frame_pointer_needed)
14997 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14999 /* If we aren't loading the PIC register,
15000 don't stack it even though it may be live. */
15002 && !TARGET_SINGLE_PIC_BASE
15003 && arm_pic_register != INVALID_REGNUM
15004 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15005 || crtl->uses_pic_offset_table))
15006 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15008 /* The prologue will copy SP into R0, so save it. */
15009 if (IS_STACKALIGN (func_type))
15010 save_reg_mask |= 1;
15013 /* Save registers so the exception handler can modify them. */
15014 if (crtl->calls_eh_return)
15020 reg = EH_RETURN_DATA_REGNO (i);
15021 if (reg == INVALID_REGNUM)
15023 save_reg_mask |= 1 << reg;
15027 return save_reg_mask;
15031 /* Compute the number of bytes used to store the static chain register on the
15032 stack, above the stack frame. We need to know this accurately to get the
15033 alignment of the rest of the stack frame correct. */
15035 static int arm_compute_static_chain_stack_bytes (void)
15037 unsigned long func_type = arm_current_func_type ();
15038 int static_chain_stack_bytes = 0;
15040 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15041 IS_NESTED (func_type) &&
15042 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15043 static_chain_stack_bytes = 4;
15045 return static_chain_stack_bytes;
15049 /* Compute a bit mask of which registers need to be
15050 saved on the stack for the current function.
15051 This is used by arm_get_frame_offsets, which may add extra registers. */
15053 static unsigned long
15054 arm_compute_save_reg_mask (void)
15056 unsigned int save_reg_mask = 0;
15057 unsigned long func_type = arm_current_func_type ();
15060 if (IS_NAKED (func_type))
15061 /* This should never really happen. */
15064 /* If we are creating a stack frame, then we must save the frame pointer,
15065 IP (which will hold the old stack pointer), LR and the PC. */
15066 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15068 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15071 | (1 << PC_REGNUM);
15073 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15075 /* Decide if we need to save the link register.
15076 Interrupt routines have their own banked link register,
15077 so they never need to save it.
15078 Otherwise if we do not use the link register we do not need to save
15079 it. If we are pushing other registers onto the stack however, we
15080 can save an instruction in the epilogue by pushing the link register
15081 now and then popping it back into the PC. This incurs extra memory
15082 accesses though, so we only do it when optimizing for size, and only
15083 if we know that we will not need a fancy return sequence. */
15084 if (df_regs_ever_live_p (LR_REGNUM)
15087 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15088 && !crtl->calls_eh_return))
15089 save_reg_mask |= 1 << LR_REGNUM;
15091 if (cfun->machine->lr_save_eliminated)
15092 save_reg_mask &= ~ (1 << LR_REGNUM);
15094 if (TARGET_REALLY_IWMMXT
15095 && ((bit_count (save_reg_mask)
15096 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15097 arm_compute_static_chain_stack_bytes())
15100 /* The total number of registers that are going to be pushed
15101 onto the stack is odd. We need to ensure that the stack
15102 is 64-bit aligned before we start to save iWMMXt registers,
15103 and also before we start to create locals. (A local variable
15104 might be a double or long long which we will load/store using
15105 an iWMMXt instruction). Therefore we need to push another
15106 ARM register, so that the stack will be 64-bit aligned. We
15107 try to avoid using the arg registers (r0 -r3) as they might be
15108 used to pass values in a tail call. */
15109 for (reg = 4; reg <= 12; reg++)
15110 if ((save_reg_mask & (1 << reg)) == 0)
15114 save_reg_mask |= (1 << reg);
15117 cfun->machine->sibcall_blocked = 1;
15118 save_reg_mask |= (1 << 3);
15122 /* We may need to push an additional register for use initializing the
15123 PIC base register. */
15124 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15125 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15127 reg = thumb_find_work_register (1 << 4);
15128 if (!call_used_regs[reg])
15129 save_reg_mask |= (1 << reg);
15132 return save_reg_mask;
15136 /* Compute a bit mask of which registers need to be
15137 saved on the stack for the current function. */
15138 static unsigned long
15139 thumb1_compute_save_reg_mask (void)
15141 unsigned long mask;
15145 for (reg = 0; reg < 12; reg ++)
15146 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15150 && !TARGET_SINGLE_PIC_BASE
15151 && arm_pic_register != INVALID_REGNUM
15152 && crtl->uses_pic_offset_table)
15153 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15155 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15156 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15157 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15159 /* LR will also be pushed if any lo regs are pushed. */
15160 if (mask & 0xff || thumb_force_lr_save ())
15161 mask |= (1 << LR_REGNUM);
15163 /* Make sure we have a low work register if we need one.
15164 We will need one if we are going to push a high register,
15165 but we are not currently intending to push a low register. */
15166 if ((mask & 0xff) == 0
15167 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15169 /* Use thumb_find_work_register to choose which register
15170 we will use. If the register is live then we will
15171 have to push it. Use LAST_LO_REGNUM as our fallback
15172 choice for the register to select. */
15173 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15174 /* Make sure the register returned by thumb_find_work_register is
15175 not part of the return value. */
15176 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15177 reg = LAST_LO_REGNUM;
15179 if (! call_used_regs[reg])
15183 /* The 504 below is 8 bytes less than 512 because there are two possible
15184 alignment words. We can't tell here if they will be present or not so we
15185 have to play it safe and assume that they are. */
15186 if ((CALLER_INTERWORKING_SLOT_SIZE +
15187 ROUND_UP_WORD (get_frame_size ()) +
15188 crtl->outgoing_args_size) >= 504)
15190 /* This is the same as the code in thumb1_expand_prologue() which
15191 determines which register to use for stack decrement. */
15192 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15193 if (mask & (1 << reg))
15196 if (reg > LAST_LO_REGNUM)
15198 /* Make sure we have a register available for stack decrement. */
15199 mask |= 1 << LAST_LO_REGNUM;
15207 /* Return the number of bytes required to save VFP registers. */
15209 arm_get_vfp_saved_size (void)
15211 unsigned int regno;
15216 /* Space for saved VFP registers. */
15217 if (TARGET_HARD_FLOAT && TARGET_VFP)
15220 for (regno = FIRST_VFP_REGNUM;
15221 regno < LAST_VFP_REGNUM;
15224 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15225 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15229 /* Workaround ARM10 VFPr1 bug. */
15230 if (count == 2 && !arm_arch6)
15232 saved += count * 8;
15241 if (count == 2 && !arm_arch6)
15243 saved += count * 8;
15250 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15251 everything bar the final return instruction. If simple_return is true,
15252 then do not output epilogue, because it has already been emitted in RTL. */
15254 output_return_instruction (rtx operand, bool really_return, bool reverse,
15255 bool simple_return)
15257 char conditional[10];
15260 unsigned long live_regs_mask;
15261 unsigned long func_type;
15262 arm_stack_offsets *offsets;
15264 func_type = arm_current_func_type ();
15266 if (IS_NAKED (func_type))
15269 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15271 /* If this function was declared non-returning, and we have
15272 found a tail call, then we have to trust that the called
15273 function won't return. */
15278 /* Otherwise, trap an attempted return by aborting. */
15280 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15282 assemble_external_libcall (ops[1]);
15283 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15289 gcc_assert (!cfun->calls_alloca || really_return);
15291 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15293 cfun->machine->return_used_this_function = 1;
15295 offsets = arm_get_frame_offsets ();
15296 live_regs_mask = offsets->saved_regs_mask;
15298 if (!simple_return && live_regs_mask)
15300 const char * return_reg;
15302 /* If we do not have any special requirements for function exit
15303 (e.g. interworking) then we can load the return address
15304 directly into the PC. Otherwise we must load it into LR. */
15306 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15307 return_reg = reg_names[PC_REGNUM];
15309 return_reg = reg_names[LR_REGNUM];
15311 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15313 /* There are three possible reasons for the IP register
15314 being saved. 1) a stack frame was created, in which case
15315 IP contains the old stack pointer, or 2) an ISR routine
15316 corrupted it, or 3) it was saved to align the stack on
15317 iWMMXt. In case 1, restore IP into SP, otherwise just
15319 if (frame_pointer_needed)
15321 live_regs_mask &= ~ (1 << IP_REGNUM);
15322 live_regs_mask |= (1 << SP_REGNUM);
15325 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15328 /* On some ARM architectures it is faster to use LDR rather than
15329 LDM to load a single register. On other architectures, the
15330 cost is the same. In 26 bit mode, or for exception handlers,
15331 we have to use LDM to load the PC so that the CPSR is also
15333 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15334 if (live_regs_mask == (1U << reg))
15337 if (reg <= LAST_ARM_REGNUM
15338 && (reg != LR_REGNUM
15340 || ! IS_INTERRUPT (func_type)))
15342 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15343 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15350 /* Generate the load multiple instruction to restore the
15351 registers. Note we can get here, even if
15352 frame_pointer_needed is true, but only if sp already
15353 points to the base of the saved core registers. */
15354 if (live_regs_mask & (1 << SP_REGNUM))
15356 unsigned HOST_WIDE_INT stack_adjust;
15358 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15359 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15361 if (stack_adjust && arm_arch5 && TARGET_ARM)
15362 if (TARGET_UNIFIED_ASM)
15363 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15365 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15368 /* If we can't use ldmib (SA110 bug),
15369 then try to pop r3 instead. */
15371 live_regs_mask |= 1 << 3;
15373 if (TARGET_UNIFIED_ASM)
15374 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15376 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15380 if (TARGET_UNIFIED_ASM)
15381 sprintf (instr, "pop%s\t{", conditional);
15383 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15385 p = instr + strlen (instr);
15387 for (reg = 0; reg <= SP_REGNUM; reg++)
15388 if (live_regs_mask & (1 << reg))
15390 int l = strlen (reg_names[reg]);
15396 memcpy (p, ", ", 2);
15400 memcpy (p, "%|", 2);
15401 memcpy (p + 2, reg_names[reg], l);
15405 if (live_regs_mask & (1 << LR_REGNUM))
15407 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15408 /* If returning from an interrupt, restore the CPSR. */
15409 if (IS_INTERRUPT (func_type))
15416 output_asm_insn (instr, & operand);
15418 /* See if we need to generate an extra instruction to
15419 perform the actual function return. */
15421 && func_type != ARM_FT_INTERWORKED
15422 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15424 /* The return has already been handled
15425 by loading the LR into the PC. */
15432 switch ((int) ARM_FUNC_TYPE (func_type))
15436 /* ??? This is wrong for unified assembly syntax. */
15437 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15440 case ARM_FT_INTERWORKED:
15441 sprintf (instr, "bx%s\t%%|lr", conditional);
15444 case ARM_FT_EXCEPTION:
15445 /* ??? This is wrong for unified assembly syntax. */
15446 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15450 /* Use bx if it's available. */
15451 if (arm_arch5 || arm_arch4t)
15452 sprintf (instr, "bx%s\t%%|lr", conditional);
15454 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15458 output_asm_insn (instr, & operand);
15464 /* Write the function name into the code section, directly preceding
15465 the function prologue.
15467 Code will be output similar to this:
15469 .ascii "arm_poke_function_name", 0
15472 .word 0xff000000 + (t1 - t0)
15473 arm_poke_function_name
15475 stmfd sp!, {fp, ip, lr, pc}
15478 When performing a stack backtrace, code can inspect the value
15479 of 'pc' stored at 'fp' + 0. If the trace function then looks
15480 at location pc - 12 and the top 8 bits are set, then we know
15481 that there is a function name embedded immediately preceding this
15482 location and has length ((pc[-3]) & 0xff000000).
15484 We assume that pc is declared as a pointer to an unsigned long.
15486 It is of no benefit to output the function name if we are assembling
15487 a leaf function. These function types will not contain a stack
15488 backtrace structure, therefore it is not possible to determine the
15491 arm_poke_function_name (FILE *stream, const char *name)
15493 unsigned long alignlength;
15494 unsigned long length;
15497 length = strlen (name) + 1;
15498 alignlength = ROUND_UP_WORD (length);
15500 ASM_OUTPUT_ASCII (stream, name, length);
15501 ASM_OUTPUT_ALIGN (stream, 2);
15502 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15503 assemble_aligned_integer (UNITS_PER_WORD, x);
15506 /* Place some comments into the assembler stream
15507 describing the current function. */
15509 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15511 unsigned long func_type;
15513 /* ??? Do we want to print some of the below anyway? */
15517 /* Sanity check. */
15518 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15520 func_type = arm_current_func_type ();
15522 switch ((int) ARM_FUNC_TYPE (func_type))
15525 case ARM_FT_NORMAL:
15527 case ARM_FT_INTERWORKED:
15528 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15531 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15534 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15536 case ARM_FT_EXCEPTION:
15537 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15541 if (IS_NAKED (func_type))
15542 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15544 if (IS_VOLATILE (func_type))
15545 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15547 if (IS_NESTED (func_type))
15548 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15549 if (IS_STACKALIGN (func_type))
15550 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15552 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15554 crtl->args.pretend_args_size, frame_size);
15556 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15557 frame_pointer_needed,
15558 cfun->machine->uses_anonymous_args);
15560 if (cfun->machine->lr_save_eliminated)
15561 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15563 if (crtl->calls_eh_return)
15564 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15569 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15570 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15572 arm_stack_offsets *offsets;
15578 /* Emit any call-via-reg trampolines that are needed for v4t support
15579 of call_reg and call_value_reg type insns. */
15580 for (regno = 0; regno < LR_REGNUM; regno++)
15582 rtx label = cfun->machine->call_via[regno];
15586 switch_to_section (function_section (current_function_decl));
15587 targetm.asm_out.internal_label (asm_out_file, "L",
15588 CODE_LABEL_NUMBER (label));
15589 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15593 /* ??? Probably not safe to set this here, since it assumes that a
15594 function will be emitted as assembly immediately after we generate
15595 RTL for it. This does not happen for inline functions. */
15596 cfun->machine->return_used_this_function = 0;
15598 else /* TARGET_32BIT */
15600 /* We need to take into account any stack-frame rounding. */
15601 offsets = arm_get_frame_offsets ();
15603 gcc_assert (!use_return_insn (FALSE, NULL)
15604 || (cfun->machine->return_used_this_function != 0)
15605 || offsets->saved_regs == offsets->outgoing_args
15606 || frame_pointer_needed);
15608 /* Reset the ARM-specific per-function variables. */
15609 after_arm_reorg = 0;
15613 /* Generate and emit an insn that we will recognize as a push_multi.
15614 Unfortunately, since this insn does not reflect very well the actual
15615 semantics of the operation, we need to annotate the insn for the benefit
15616 of DWARF2 frame unwind information. */
15618 emit_multi_reg_push (unsigned long mask)
15621 int num_dwarf_regs;
15625 int dwarf_par_index;
15628 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15629 if (mask & (1 << i))
15632 gcc_assert (num_regs && num_regs <= 16);
15634 /* We don't record the PC in the dwarf frame information. */
15635 num_dwarf_regs = num_regs;
15636 if (mask & (1 << PC_REGNUM))
15639 /* For the body of the insn we are going to generate an UNSPEC in
15640 parallel with several USEs. This allows the insn to be recognized
15641 by the push_multi pattern in the arm.md file.
15643 The body of the insn looks something like this:
15646 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15647 (const_int:SI <num>)))
15648 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15654 For the frame note however, we try to be more explicit and actually
15655 show each register being stored into the stack frame, plus a (single)
15656 decrement of the stack pointer. We do it this way in order to be
15657 friendly to the stack unwinding code, which only wants to see a single
15658 stack decrement per instruction. The RTL we generate for the note looks
15659 something like this:
15662 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15663 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15664 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15665 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15669 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15670 instead we'd have a parallel expression detailing all
15671 the stores to the various memory addresses so that debug
15672 information is more up-to-date. Remember however while writing
15673 this to take care of the constraints with the push instruction.
15675 Note also that this has to be taken care of for the VFP registers.
15677 For more see PR43399. */
15679 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15680 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15681 dwarf_par_index = 1;
15683 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15685 if (mask & (1 << i))
15687 reg = gen_rtx_REG (SImode, i);
15689 XVECEXP (par, 0, 0)
15690 = gen_rtx_SET (VOIDmode,
15693 gen_rtx_PRE_MODIFY (Pmode,
15696 (Pmode, stack_pointer_rtx,
15699 gen_rtx_UNSPEC (BLKmode,
15700 gen_rtvec (1, reg),
15701 UNSPEC_PUSH_MULT));
15703 if (i != PC_REGNUM)
15705 tmp = gen_rtx_SET (VOIDmode,
15706 gen_frame_mem (SImode, stack_pointer_rtx),
15708 RTX_FRAME_RELATED_P (tmp) = 1;
15709 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15717 for (j = 1, i++; j < num_regs; i++)
15719 if (mask & (1 << i))
15721 reg = gen_rtx_REG (SImode, i);
15723 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15725 if (i != PC_REGNUM)
15728 = gen_rtx_SET (VOIDmode,
15731 plus_constant (Pmode, stack_pointer_rtx,
15734 RTX_FRAME_RELATED_P (tmp) = 1;
15735 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15742 par = emit_insn (par);
15744 tmp = gen_rtx_SET (VOIDmode,
15746 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15747 RTX_FRAME_RELATED_P (tmp) = 1;
15748 XVECEXP (dwarf, 0, 0) = tmp;
15750 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15755 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15756 SAVED_REGS_MASK shows which registers need to be restored.
15758 Unfortunately, since this insn does not reflect very well the actual
15759 semantics of the operation, we need to annotate the insn for the benefit
15760 of DWARF2 frame unwind information. */
15762 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
15767 rtx dwarf = NULL_RTX;
15773 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
15774 offset_adj = return_in_pc ? 1 : 0;
15775 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15776 if (saved_regs_mask & (1 << i))
15779 gcc_assert (num_regs && num_regs <= 16);
15781 /* If SP is in reglist, then we don't emit SP update insn. */
15782 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
15784 /* The parallel needs to hold num_regs SETs
15785 and one SET for the stack update. */
15786 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
15791 XVECEXP (par, 0, 0) = tmp;
15796 /* Increment the stack pointer, based on there being
15797 num_regs 4-byte registers to restore. */
15798 tmp = gen_rtx_SET (VOIDmode,
15800 plus_constant (Pmode,
15803 RTX_FRAME_RELATED_P (tmp) = 1;
15804 XVECEXP (par, 0, offset_adj) = tmp;
15807 /* Now restore every reg, which may include PC. */
15808 for (j = 0, i = 0; j < num_regs; i++)
15809 if (saved_regs_mask & (1 << i))
15811 reg = gen_rtx_REG (SImode, i);
15812 tmp = gen_rtx_SET (VOIDmode,
15816 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
15817 RTX_FRAME_RELATED_P (tmp) = 1;
15818 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
15820 /* We need to maintain a sequence for DWARF info too. As dwarf info
15821 should not have PC, skip PC. */
15822 if (i != PC_REGNUM)
15823 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15829 par = emit_jump_insn (par);
15831 par = emit_insn (par);
15833 REG_NOTES (par) = dwarf;
15836 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15837 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15839 Unfortunately, since this insn does not reflect very well the actual
15840 semantics of the operation, we need to annotate the insn for the benefit
15841 of DWARF2 frame unwind information. */
15843 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
15847 rtx dwarf = NULL_RTX;
15850 gcc_assert (num_regs && num_regs <= 32);
15852 /* Workaround ARM10 VFPr1 bug. */
15853 if (num_regs == 2 && !arm_arch6)
15855 if (first_reg == 15)
15861 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
15862 there could be up to 32 D-registers to restore.
15863 If there are more than 16 D-registers, make two recursive calls,
15864 each of which emits one pop_multi instruction. */
15867 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
15868 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
15872 /* The parallel needs to hold num_regs SETs
15873 and one SET for the stack update. */
15874 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
15876 /* Increment the stack pointer, based on there being
15877 num_regs 8-byte registers to restore. */
15878 tmp = gen_rtx_SET (VOIDmode,
15880 plus_constant (Pmode, base_reg, 8 * num_regs));
15881 RTX_FRAME_RELATED_P (tmp) = 1;
15882 XVECEXP (par, 0, 0) = tmp;
15884 /* Now show every reg that will be restored, using a SET for each. */
15885 for (j = 0, i=first_reg; j < num_regs; i += 2)
15887 reg = gen_rtx_REG (DFmode, i);
15889 tmp = gen_rtx_SET (VOIDmode,
15893 plus_constant (Pmode, base_reg, 8 * j)));
15894 RTX_FRAME_RELATED_P (tmp) = 1;
15895 XVECEXP (par, 0, j + 1) = tmp;
15897 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15902 par = emit_insn (par);
15903 REG_NOTES (par) = dwarf;
15906 /* Calculate the size of the return value that is passed in registers. */
15908 arm_size_return_regs (void)
15910 enum machine_mode mode;
15912 if (crtl->return_rtx != 0)
15913 mode = GET_MODE (crtl->return_rtx);
15915 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15917 return GET_MODE_SIZE (mode);
15920 /* Return true if the current function needs to save/restore LR. */
15922 thumb_force_lr_save (void)
15924 return !cfun->machine->lr_save_eliminated
15925 && (!leaf_function_p ()
15926 || thumb_far_jump_used_p ()
15927 || df_regs_ever_live_p (LR_REGNUM));
15931 /* Return true if r3 is used by any of the tail call insns in the
15932 current function. */
15934 any_sibcall_uses_r3 (void)
15939 if (!crtl->tail_call_emit)
15941 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15942 if (e->flags & EDGE_SIBCALL)
15944 rtx call = BB_END (e->src);
15945 if (!CALL_P (call))
15946 call = prev_nonnote_nondebug_insn (call);
15947 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15948 if (find_regno_fusage (call, USE, 3))
15955 /* Compute the distance from register FROM to register TO.
15956 These can be the arg pointer (26), the soft frame pointer (25),
15957 the stack pointer (13) or the hard frame pointer (11).
15958 In thumb mode r7 is used as the soft frame pointer, if needed.
15959 Typical stack layout looks like this:
15961 old stack pointer -> | |
15964 | | saved arguments for
15965 | | vararg functions
15968 hard FP & arg pointer -> | | \
15976 soft frame pointer -> | | /
15981 locals base pointer -> | | /
15986 current stack pointer -> | | /
15989 For a given function some or all of these stack components
15990 may not be needed, giving rise to the possibility of
15991 eliminating some of the registers.
15993 The values returned by this function must reflect the behavior
15994 of arm_expand_prologue() and arm_compute_save_reg_mask().
15996 The sign of the number returned reflects the direction of stack
15997 growth, so the values are positive for all eliminations except
15998 from the soft frame pointer to the hard frame pointer.
16000 SFP may point just inside the local variables block to ensure correct
16004 /* Calculate stack offsets. These are used to calculate register elimination
16005 offsets and in prologue/epilogue code. Also calculates which registers
16006 should be saved. */
16008 static arm_stack_offsets *
16009 arm_get_frame_offsets (void)
16011 struct arm_stack_offsets *offsets;
16012 unsigned long func_type;
16016 HOST_WIDE_INT frame_size;
16019 offsets = &cfun->machine->stack_offsets;
16021 /* We need to know if we are a leaf function. Unfortunately, it
16022 is possible to be called after start_sequence has been called,
16023 which causes get_insns to return the insns for the sequence,
16024 not the function, which will cause leaf_function_p to return
16025 the incorrect result.
16027 to know about leaf functions once reload has completed, and the
16028 frame size cannot be changed after that time, so we can safely
16029 use the cached value. */
16031 if (reload_completed)
16034 /* Initially this is the size of the local variables. It will translated
16035 into an offset once we have determined the size of preceding data. */
16036 frame_size = ROUND_UP_WORD (get_frame_size ());
16038 leaf = leaf_function_p ();
16040 /* Space for variadic functions. */
16041 offsets->saved_args = crtl->args.pretend_args_size;
16043 /* In Thumb mode this is incorrect, but never used. */
16044 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16045 arm_compute_static_chain_stack_bytes();
16049 unsigned int regno;
16051 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16052 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16053 saved = core_saved;
16055 /* We know that SP will be doubleword aligned on entry, and we must
16056 preserve that condition at any subroutine call. We also require the
16057 soft frame pointer to be doubleword aligned. */
16059 if (TARGET_REALLY_IWMMXT)
16061 /* Check for the call-saved iWMMXt registers. */
16062 for (regno = FIRST_IWMMXT_REGNUM;
16063 regno <= LAST_IWMMXT_REGNUM;
16065 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16069 func_type = arm_current_func_type ();
16070 /* Space for saved VFP registers. */
16071 if (! IS_VOLATILE (func_type)
16072 && TARGET_HARD_FLOAT && TARGET_VFP)
16073 saved += arm_get_vfp_saved_size ();
16075 else /* TARGET_THUMB1 */
16077 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16078 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16079 saved = core_saved;
16080 if (TARGET_BACKTRACE)
16084 /* Saved registers include the stack frame. */
16085 offsets->saved_regs = offsets->saved_args + saved +
16086 arm_compute_static_chain_stack_bytes();
16087 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16088 /* A leaf function does not need any stack alignment if it has nothing
16090 if (leaf && frame_size == 0
16091 /* However if it calls alloca(), we have a dynamically allocated
16092 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16093 && ! cfun->calls_alloca)
16095 offsets->outgoing_args = offsets->soft_frame;
16096 offsets->locals_base = offsets->soft_frame;
16100 /* Ensure SFP has the correct alignment. */
16101 if (ARM_DOUBLEWORD_ALIGN
16102 && (offsets->soft_frame & 7))
16104 offsets->soft_frame += 4;
16105 /* Try to align stack by pushing an extra reg. Don't bother doing this
16106 when there is a stack frame as the alignment will be rolled into
16107 the normal stack adjustment. */
16108 if (frame_size + crtl->outgoing_args_size == 0)
16112 /* If it is safe to use r3, then do so. This sometimes
16113 generates better code on Thumb-2 by avoiding the need to
16114 use 32-bit push/pop instructions. */
16115 if (! any_sibcall_uses_r3 ()
16116 && arm_size_return_regs () <= 12
16117 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16122 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16124 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16133 offsets->saved_regs += 4;
16134 offsets->saved_regs_mask |= (1 << reg);
16139 offsets->locals_base = offsets->soft_frame + frame_size;
16140 offsets->outgoing_args = (offsets->locals_base
16141 + crtl->outgoing_args_size);
16143 if (ARM_DOUBLEWORD_ALIGN)
16145 /* Ensure SP remains doubleword aligned. */
16146 if (offsets->outgoing_args & 7)
16147 offsets->outgoing_args += 4;
16148 gcc_assert (!(offsets->outgoing_args & 7));
16155 /* Calculate the relative offsets for the different stack pointers. Positive
16156 offsets are in the direction of stack growth. */
16159 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16161 arm_stack_offsets *offsets;
16163 offsets = arm_get_frame_offsets ();
16165 /* OK, now we have enough information to compute the distances.
16166 There must be an entry in these switch tables for each pair
16167 of registers in ELIMINABLE_REGS, even if some of the entries
16168 seem to be redundant or useless. */
16171 case ARG_POINTER_REGNUM:
16174 case THUMB_HARD_FRAME_POINTER_REGNUM:
16177 case FRAME_POINTER_REGNUM:
16178 /* This is the reverse of the soft frame pointer
16179 to hard frame pointer elimination below. */
16180 return offsets->soft_frame - offsets->saved_args;
16182 case ARM_HARD_FRAME_POINTER_REGNUM:
16183 /* This is only non-zero in the case where the static chain register
16184 is stored above the frame. */
16185 return offsets->frame - offsets->saved_args - 4;
16187 case STACK_POINTER_REGNUM:
16188 /* If nothing has been pushed on the stack at all
16189 then this will return -4. This *is* correct! */
16190 return offsets->outgoing_args - (offsets->saved_args + 4);
16193 gcc_unreachable ();
16195 gcc_unreachable ();
16197 case FRAME_POINTER_REGNUM:
16200 case THUMB_HARD_FRAME_POINTER_REGNUM:
16203 case ARM_HARD_FRAME_POINTER_REGNUM:
16204 /* The hard frame pointer points to the top entry in the
16205 stack frame. The soft frame pointer to the bottom entry
16206 in the stack frame. If there is no stack frame at all,
16207 then they are identical. */
16209 return offsets->frame - offsets->soft_frame;
16211 case STACK_POINTER_REGNUM:
16212 return offsets->outgoing_args - offsets->soft_frame;
16215 gcc_unreachable ();
16217 gcc_unreachable ();
16220 /* You cannot eliminate from the stack pointer.
16221 In theory you could eliminate from the hard frame
16222 pointer to the stack pointer, but this will never
16223 happen, since if a stack frame is not needed the
16224 hard frame pointer will never be used. */
16225 gcc_unreachable ();
16229 /* Given FROM and TO register numbers, say whether this elimination is
16230 allowed. Frame pointer elimination is automatically handled.
16232 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16233 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16234 pointer, we must eliminate FRAME_POINTER_REGNUM into
16235 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16236 ARG_POINTER_REGNUM. */
16239 arm_can_eliminate (const int from, const int to)
16241 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16242 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16243 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16244 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16248 /* Emit RTL to save coprocessor registers on function entry. Returns the
16249 number of bytes pushed. */
16252 arm_save_coproc_regs(void)
16254 int saved_size = 0;
16256 unsigned start_reg;
16259 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16260 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16262 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16263 insn = gen_rtx_MEM (V2SImode, insn);
16264 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16265 RTX_FRAME_RELATED_P (insn) = 1;
16269 if (TARGET_HARD_FLOAT && TARGET_VFP)
16271 start_reg = FIRST_VFP_REGNUM;
16273 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16275 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16276 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16278 if (start_reg != reg)
16279 saved_size += vfp_emit_fstmd (start_reg,
16280 (reg - start_reg) / 2);
16281 start_reg = reg + 2;
16284 if (start_reg != reg)
16285 saved_size += vfp_emit_fstmd (start_reg,
16286 (reg - start_reg) / 2);
16292 /* Set the Thumb frame pointer from the stack pointer. */
16295 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16297 HOST_WIDE_INT amount;
16300 amount = offsets->outgoing_args - offsets->locals_base;
16302 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16303 stack_pointer_rtx, GEN_INT (amount)));
16306 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16307 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16308 expects the first two operands to be the same. */
16311 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16313 hard_frame_pointer_rtx));
16317 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16318 hard_frame_pointer_rtx,
16319 stack_pointer_rtx));
16321 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16322 plus_constant (Pmode, stack_pointer_rtx, amount));
16323 RTX_FRAME_RELATED_P (dwarf) = 1;
16324 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16327 RTX_FRAME_RELATED_P (insn) = 1;
16330 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16333 arm_expand_prologue (void)
16338 unsigned long live_regs_mask;
16339 unsigned long func_type;
16341 int saved_pretend_args = 0;
16342 int saved_regs = 0;
16343 unsigned HOST_WIDE_INT args_to_push;
16344 arm_stack_offsets *offsets;
16346 func_type = arm_current_func_type ();
16348 /* Naked functions don't have prologues. */
16349 if (IS_NAKED (func_type))
16352 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16353 args_to_push = crtl->args.pretend_args_size;
16355 /* Compute which register we will have to save onto the stack. */
16356 offsets = arm_get_frame_offsets ();
16357 live_regs_mask = offsets->saved_regs_mask;
16359 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16361 if (IS_STACKALIGN (func_type))
16365 /* Handle a word-aligned stack pointer. We generate the following:
16370 <save and restore r0 in normal prologue/epilogue>
16374 The unwinder doesn't need to know about the stack realignment.
16375 Just tell it we saved SP in r0. */
16376 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16378 r0 = gen_rtx_REG (SImode, 0);
16379 r1 = gen_rtx_REG (SImode, 1);
16381 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16382 RTX_FRAME_RELATED_P (insn) = 1;
16383 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16385 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16387 /* ??? The CFA changes here, which may cause GDB to conclude that it
16388 has entered a different function. That said, the unwind info is
16389 correct, individually, before and after this instruction because
16390 we've described the save of SP, which will override the default
16391 handling of SP as restoring from the CFA. */
16392 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16395 /* For APCS frames, if IP register is clobbered
16396 when creating frame, save that register in a special
16398 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16400 if (IS_INTERRUPT (func_type))
16402 /* Interrupt functions must not corrupt any registers.
16403 Creating a frame pointer however, corrupts the IP
16404 register, so we must push it first. */
16405 emit_multi_reg_push (1 << IP_REGNUM);
16407 /* Do not set RTX_FRAME_RELATED_P on this insn.
16408 The dwarf stack unwinding code only wants to see one
16409 stack decrement per function, and this is not it. If
16410 this instruction is labeled as being part of the frame
16411 creation sequence then dwarf2out_frame_debug_expr will
16412 die when it encounters the assignment of IP to FP
16413 later on, since the use of SP here establishes SP as
16414 the CFA register and not IP.
16416 Anyway this instruction is not really part of the stack
16417 frame creation although it is part of the prologue. */
16419 else if (IS_NESTED (func_type))
16421 /* The Static chain register is the same as the IP register
16422 used as a scratch register during stack frame creation.
16423 To get around this need to find somewhere to store IP
16424 whilst the frame is being created. We try the following
16427 1. The last argument register.
16428 2. A slot on the stack above the frame. (This only
16429 works if the function is not a varargs function).
16430 3. Register r3, after pushing the argument registers
16433 Note - we only need to tell the dwarf2 backend about the SP
16434 adjustment in the second variant; the static chain register
16435 doesn't need to be unwound, as it doesn't contain a value
16436 inherited from the caller. */
16438 if (df_regs_ever_live_p (3) == false)
16439 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16440 else if (args_to_push == 0)
16444 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16447 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16448 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16451 /* Just tell the dwarf backend that we adjusted SP. */
16452 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16453 plus_constant (Pmode, stack_pointer_rtx,
16455 RTX_FRAME_RELATED_P (insn) = 1;
16456 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16460 /* Store the args on the stack. */
16461 if (cfun->machine->uses_anonymous_args)
16462 insn = emit_multi_reg_push
16463 ((0xf0 >> (args_to_push / 4)) & 0xf);
16466 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16467 GEN_INT (- args_to_push)));
16469 RTX_FRAME_RELATED_P (insn) = 1;
16471 saved_pretend_args = 1;
16472 fp_offset = args_to_push;
16475 /* Now reuse r3 to preserve IP. */
16476 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16480 insn = emit_set_insn (ip_rtx,
16481 plus_constant (Pmode, stack_pointer_rtx,
16483 RTX_FRAME_RELATED_P (insn) = 1;
16488 /* Push the argument registers, or reserve space for them. */
16489 if (cfun->machine->uses_anonymous_args)
16490 insn = emit_multi_reg_push
16491 ((0xf0 >> (args_to_push / 4)) & 0xf);
16494 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16495 GEN_INT (- args_to_push)));
16496 RTX_FRAME_RELATED_P (insn) = 1;
16499 /* If this is an interrupt service routine, and the link register
16500 is going to be pushed, and we're not generating extra
16501 push of IP (needed when frame is needed and frame layout if apcs),
16502 subtracting four from LR now will mean that the function return
16503 can be done with a single instruction. */
16504 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16505 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16506 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16509 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16511 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16514 if (live_regs_mask)
16516 saved_regs += bit_count (live_regs_mask) * 4;
16517 if (optimize_size && !frame_pointer_needed
16518 && saved_regs == offsets->saved_regs - offsets->saved_args)
16520 /* If no coprocessor registers are being pushed and we don't have
16521 to worry about a frame pointer then push extra registers to
16522 create the stack frame. This is done is a way that does not
16523 alter the frame layout, so is independent of the epilogue. */
16527 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16529 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16530 if (frame && n * 4 >= frame)
16533 live_regs_mask |= (1 << n) - 1;
16534 saved_regs += frame;
16537 insn = emit_multi_reg_push (live_regs_mask);
16538 RTX_FRAME_RELATED_P (insn) = 1;
16541 if (! IS_VOLATILE (func_type))
16542 saved_regs += arm_save_coproc_regs ();
16544 if (frame_pointer_needed && TARGET_ARM)
16546 /* Create the new frame pointer. */
16547 if (TARGET_APCS_FRAME)
16549 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16550 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16551 RTX_FRAME_RELATED_P (insn) = 1;
16553 if (IS_NESTED (func_type))
16555 /* Recover the static chain register. */
16556 if (!df_regs_ever_live_p (3)
16557 || saved_pretend_args)
16558 insn = gen_rtx_REG (SImode, 3);
16559 else /* if (crtl->args.pretend_args_size == 0) */
16561 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16562 insn = gen_frame_mem (SImode, insn);
16564 emit_set_insn (ip_rtx, insn);
16565 /* Add a USE to stop propagate_one_insn() from barfing. */
16566 emit_insn (gen_prologue_use (ip_rtx));
16571 insn = GEN_INT (saved_regs - 4);
16572 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16573 stack_pointer_rtx, insn));
16574 RTX_FRAME_RELATED_P (insn) = 1;
16578 if (flag_stack_usage_info)
16579 current_function_static_stack_size
16580 = offsets->outgoing_args - offsets->saved_args;
16582 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16584 /* This add can produce multiple insns for a large constant, so we
16585 need to get tricky. */
16586 rtx last = get_last_insn ();
16588 amount = GEN_INT (offsets->saved_args + saved_regs
16589 - offsets->outgoing_args);
16591 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16595 last = last ? NEXT_INSN (last) : get_insns ();
16596 RTX_FRAME_RELATED_P (last) = 1;
16598 while (last != insn);
16600 /* If the frame pointer is needed, emit a special barrier that
16601 will prevent the scheduler from moving stores to the frame
16602 before the stack adjustment. */
16603 if (frame_pointer_needed)
16604 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16605 hard_frame_pointer_rtx));
16609 if (frame_pointer_needed && TARGET_THUMB2)
16610 thumb_set_frame_pointer (offsets);
16612 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16614 unsigned long mask;
16616 mask = live_regs_mask;
16617 mask &= THUMB2_WORK_REGS;
16618 if (!IS_NESTED (func_type))
16619 mask |= (1 << IP_REGNUM);
16620 arm_load_pic_register (mask);
16623 /* If we are profiling, make sure no instructions are scheduled before
16624 the call to mcount. Similarly if the user has requested no
16625 scheduling in the prolog. Similarly if we want non-call exceptions
16626 using the EABI unwinder, to prevent faulting instructions from being
16627 swapped with a stack adjustment. */
16628 if (crtl->profile || !TARGET_SCHED_PROLOG
16629 || (arm_except_unwind_info (&global_options) == UI_TARGET
16630 && cfun->can_throw_non_call_exceptions))
16631 emit_insn (gen_blockage ());
16633 /* If the link register is being kept alive, with the return address in it,
16634 then make sure that it does not get reused by the ce2 pass. */
16635 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16636 cfun->machine->lr_save_eliminated = 1;
16639 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16641 arm_print_condition (FILE *stream)
16643 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16645 /* Branch conversion is not implemented for Thumb-2. */
16648 output_operand_lossage ("predicated Thumb instruction");
16651 if (current_insn_predicate != NULL)
16653 output_operand_lossage
16654 ("predicated instruction in conditional sequence");
16658 fputs (arm_condition_codes[arm_current_cc], stream);
16660 else if (current_insn_predicate)
16662 enum arm_cond_code code;
16666 output_operand_lossage ("predicated Thumb instruction");
16670 code = get_arm_condition_code (current_insn_predicate);
16671 fputs (arm_condition_codes[code], stream);
16676 /* If CODE is 'd', then the X is a condition operand and the instruction
16677 should only be executed if the condition is true.
16678 if CODE is 'D', then the X is a condition operand and the instruction
16679 should only be executed if the condition is false: however, if the mode
16680 of the comparison is CCFPEmode, then always execute the instruction -- we
16681 do this because in these circumstances !GE does not necessarily imply LT;
16682 in these cases the instruction pattern will take care to make sure that
16683 an instruction containing %d will follow, thereby undoing the effects of
16684 doing this instruction unconditionally.
16685 If CODE is 'N' then X is a floating point operand that must be negated
16687 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16688 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16690 arm_print_operand (FILE *stream, rtx x, int code)
16695 fputs (ASM_COMMENT_START, stream);
16699 fputs (user_label_prefix, stream);
16703 fputs (REGISTER_PREFIX, stream);
16707 arm_print_condition (stream);
16711 /* Nothing in unified syntax, otherwise the current condition code. */
16712 if (!TARGET_UNIFIED_ASM)
16713 arm_print_condition (stream);
16717 /* The current condition code in unified syntax, otherwise nothing. */
16718 if (TARGET_UNIFIED_ASM)
16719 arm_print_condition (stream);
16723 /* The current condition code for a condition code setting instruction.
16724 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16725 if (TARGET_UNIFIED_ASM)
16727 fputc('s', stream);
16728 arm_print_condition (stream);
16732 arm_print_condition (stream);
16733 fputc('s', stream);
16738 /* If the instruction is conditionally executed then print
16739 the current condition code, otherwise print 's'. */
16740 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16741 if (current_insn_predicate)
16742 arm_print_condition (stream);
16744 fputc('s', stream);
16747 /* %# is a "break" sequence. It doesn't output anything, but is used to
16748 separate e.g. operand numbers from following text, if that text consists
16749 of further digits which we don't want to be part of the operand
16757 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16758 r = real_value_negate (&r);
16759 fprintf (stream, "%s", fp_const_from_val (&r));
16763 /* An integer or symbol address without a preceding # sign. */
16765 switch (GET_CODE (x))
16768 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16772 output_addr_const (stream, x);
16776 if (GET_CODE (XEXP (x, 0)) == PLUS
16777 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16779 output_addr_const (stream, x);
16782 /* Fall through. */
16785 output_operand_lossage ("Unsupported operand for code '%c'", code);
16789 /* An integer that we want to print in HEX. */
16791 switch (GET_CODE (x))
16794 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16798 output_operand_lossage ("Unsupported operand for code '%c'", code);
16803 if (GET_CODE (x) == CONST_INT)
16806 val = ARM_SIGN_EXTEND (~INTVAL (x));
16807 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16811 putc ('~', stream);
16812 output_addr_const (stream, x);
16817 /* The low 16 bits of an immediate constant. */
16818 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16822 fprintf (stream, "%s", arithmetic_instr (x, 1));
16826 fprintf (stream, "%s", arithmetic_instr (x, 0));
16834 if (!shift_operator (x, SImode))
16836 output_operand_lossage ("invalid shift operand");
16840 shift = shift_op (x, &val);
16844 fprintf (stream, ", %s ", shift);
16846 arm_print_operand (stream, XEXP (x, 1), 0);
16848 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16853 /* An explanation of the 'Q', 'R' and 'H' register operands:
16855 In a pair of registers containing a DI or DF value the 'Q'
16856 operand returns the register number of the register containing
16857 the least significant part of the value. The 'R' operand returns
16858 the register number of the register containing the most
16859 significant part of the value.
16861 The 'H' operand returns the higher of the two register numbers.
16862 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16863 same as the 'Q' operand, since the most significant part of the
16864 value is held in the lower number register. The reverse is true
16865 on systems where WORDS_BIG_ENDIAN is false.
16867 The purpose of these operands is to distinguish between cases
16868 where the endian-ness of the values is important (for example
16869 when they are added together), and cases where the endian-ness
16870 is irrelevant, but the order of register operations is important.
16871 For example when loading a value from memory into a register
16872 pair, the endian-ness does not matter. Provided that the value
16873 from the lower memory address is put into the lower numbered
16874 register, and the value from the higher address is put into the
16875 higher numbered register, the load will work regardless of whether
16876 the value being loaded is big-wordian or little-wordian. The
16877 order of the two register loads can matter however, if the address
16878 of the memory location is actually held in one of the registers
16879 being overwritten by the load.
16881 The 'Q' and 'R' constraints are also available for 64-bit
16884 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16886 rtx part = gen_lowpart (SImode, x);
16887 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16891 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16893 output_operand_lossage ("invalid operand for code '%c'", code);
16897 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16901 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16903 enum machine_mode mode = GET_MODE (x);
16906 if (mode == VOIDmode)
16908 part = gen_highpart_mode (SImode, mode, x);
16909 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16913 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16915 output_operand_lossage ("invalid operand for code '%c'", code);
16919 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16923 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16925 output_operand_lossage ("invalid operand for code '%c'", code);
16929 asm_fprintf (stream, "%r", REGNO (x) + 1);
16933 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16935 output_operand_lossage ("invalid operand for code '%c'", code);
16939 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16943 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16945 output_operand_lossage ("invalid operand for code '%c'", code);
16949 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16953 asm_fprintf (stream, "%r",
16954 GET_CODE (XEXP (x, 0)) == REG
16955 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16959 asm_fprintf (stream, "{%r-%r}",
16961 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16964 /* Like 'M', but writing doubleword vector registers, for use by Neon
16968 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16969 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16971 asm_fprintf (stream, "{d%d}", regno);
16973 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16978 /* CONST_TRUE_RTX means always -- that's the default. */
16979 if (x == const_true_rtx)
16982 if (!COMPARISON_P (x))
16984 output_operand_lossage ("invalid operand for code '%c'", code);
16988 fputs (arm_condition_codes[get_arm_condition_code (x)],
16993 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16994 want to do that. */
16995 if (x == const_true_rtx)
16997 output_operand_lossage ("instruction never executed");
17000 if (!COMPARISON_P (x))
17002 output_operand_lossage ("invalid operand for code '%c'", code);
17006 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17007 (get_arm_condition_code (x))],
17017 /* Former Maverick support, removed after GCC-4.7. */
17018 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17022 if (GET_CODE (x) != REG
17023 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17024 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17025 /* Bad value for wCG register number. */
17027 output_operand_lossage ("invalid operand for code '%c'", code);
17032 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17035 /* Print an iWMMXt control register name. */
17037 if (GET_CODE (x) != CONST_INT
17039 || INTVAL (x) >= 16)
17040 /* Bad value for wC register number. */
17042 output_operand_lossage ("invalid operand for code '%c'", code);
17048 static const char * wc_reg_names [16] =
17050 "wCID", "wCon", "wCSSF", "wCASF",
17051 "wC4", "wC5", "wC6", "wC7",
17052 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17053 "wC12", "wC13", "wC14", "wC15"
17056 fprintf (stream, wc_reg_names [INTVAL (x)]);
17060 /* Print the high single-precision register of a VFP double-precision
17064 int mode = GET_MODE (x);
17067 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17069 output_operand_lossage ("invalid operand for code '%c'", code);
17074 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17076 output_operand_lossage ("invalid operand for code '%c'", code);
17080 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17084 /* Print a VFP/Neon double precision or quad precision register name. */
17088 int mode = GET_MODE (x);
17089 int is_quad = (code == 'q');
17092 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17094 output_operand_lossage ("invalid operand for code '%c'", code);
17098 if (GET_CODE (x) != REG
17099 || !IS_VFP_REGNUM (REGNO (x)))
17101 output_operand_lossage ("invalid operand for code '%c'", code);
17106 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17107 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17109 output_operand_lossage ("invalid operand for code '%c'", code);
17113 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17114 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17118 /* These two codes print the low/high doubleword register of a Neon quad
17119 register, respectively. For pair-structure types, can also print
17120 low/high quadword registers. */
17124 int mode = GET_MODE (x);
17127 if ((GET_MODE_SIZE (mode) != 16
17128 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17130 output_operand_lossage ("invalid operand for code '%c'", code);
17135 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17137 output_operand_lossage ("invalid operand for code '%c'", code);
17141 if (GET_MODE_SIZE (mode) == 16)
17142 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17143 + (code == 'f' ? 1 : 0));
17145 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17146 + (code == 'f' ? 1 : 0));
17150 /* Print a VFPv3 floating-point constant, represented as an integer
17154 int index = vfp3_const_double_index (x);
17155 gcc_assert (index != -1);
17156 fprintf (stream, "%d", index);
17160 /* Print bits representing opcode features for Neon.
17162 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17163 and polynomials as unsigned.
17165 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17167 Bit 2 is 1 for rounding functions, 0 otherwise. */
17169 /* Identify the type as 's', 'u', 'p' or 'f'. */
17172 HOST_WIDE_INT bits = INTVAL (x);
17173 fputc ("uspf"[bits & 3], stream);
17177 /* Likewise, but signed and unsigned integers are both 'i'. */
17180 HOST_WIDE_INT bits = INTVAL (x);
17181 fputc ("iipf"[bits & 3], stream);
17185 /* As for 'T', but emit 'u' instead of 'p'. */
17188 HOST_WIDE_INT bits = INTVAL (x);
17189 fputc ("usuf"[bits & 3], stream);
17193 /* Bit 2: rounding (vs none). */
17196 HOST_WIDE_INT bits = INTVAL (x);
17197 fputs ((bits & 4) != 0 ? "r" : "", stream);
17201 /* Memory operand for vld1/vst1 instruction. */
17205 bool postinc = FALSE;
17206 unsigned align, memsize, align_bits;
17208 gcc_assert (GET_CODE (x) == MEM);
17209 addr = XEXP (x, 0);
17210 if (GET_CODE (addr) == POST_INC)
17213 addr = XEXP (addr, 0);
17215 asm_fprintf (stream, "[%r", REGNO (addr));
17217 /* We know the alignment of this access, so we can emit a hint in the
17218 instruction (for some alignments) as an aid to the memory subsystem
17220 align = MEM_ALIGN (x) >> 3;
17221 memsize = MEM_SIZE (x);
17223 /* Only certain alignment specifiers are supported by the hardware. */
17224 if (memsize == 32 && (align % 32) == 0)
17226 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17228 else if (memsize >= 8 && (align % 8) == 0)
17233 if (align_bits != 0)
17234 asm_fprintf (stream, ":%d", align_bits);
17236 asm_fprintf (stream, "]");
17239 fputs("!", stream);
17247 gcc_assert (GET_CODE (x) == MEM);
17248 addr = XEXP (x, 0);
17249 gcc_assert (GET_CODE (addr) == REG);
17250 asm_fprintf (stream, "[%r]", REGNO (addr));
17254 /* Translate an S register number into a D register number and element index. */
17257 int mode = GET_MODE (x);
17260 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17262 output_operand_lossage ("invalid operand for code '%c'", code);
17267 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17269 output_operand_lossage ("invalid operand for code '%c'", code);
17273 regno = regno - FIRST_VFP_REGNUM;
17274 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17279 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17280 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17283 /* Register specifier for vld1.16/vst1.16. Translate the S register
17284 number into a D register number and element index. */
17287 int mode = GET_MODE (x);
17290 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17292 output_operand_lossage ("invalid operand for code '%c'", code);
17297 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17299 output_operand_lossage ("invalid operand for code '%c'", code);
17303 regno = regno - FIRST_VFP_REGNUM;
17304 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17311 output_operand_lossage ("missing operand");
17315 switch (GET_CODE (x))
17318 asm_fprintf (stream, "%r", REGNO (x));
17322 output_memory_reference_mode = GET_MODE (x);
17323 output_address (XEXP (x, 0));
17330 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17331 sizeof (fpstr), 0, 1);
17332 fprintf (stream, "#%s", fpstr);
17335 fprintf (stream, "#%s", fp_immediate_constant (x));
17339 gcc_assert (GET_CODE (x) != NEG);
17340 fputc ('#', stream);
17341 if (GET_CODE (x) == HIGH)
17343 fputs (":lower16:", stream);
17347 output_addr_const (stream, x);
17353 /* Target hook for printing a memory address. */
17355 arm_print_operand_address (FILE *stream, rtx x)
17359 int is_minus = GET_CODE (x) == MINUS;
17361 if (GET_CODE (x) == REG)
17362 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17363 else if (GET_CODE (x) == PLUS || is_minus)
17365 rtx base = XEXP (x, 0);
17366 rtx index = XEXP (x, 1);
17367 HOST_WIDE_INT offset = 0;
17368 if (GET_CODE (base) != REG
17369 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17371 /* Ensure that BASE is a register. */
17372 /* (one of them must be). */
17373 /* Also ensure the SP is not used as in index register. */
17378 switch (GET_CODE (index))
17381 offset = INTVAL (index);
17384 asm_fprintf (stream, "[%r, #%wd]",
17385 REGNO (base), offset);
17389 asm_fprintf (stream, "[%r, %s%r]",
17390 REGNO (base), is_minus ? "-" : "",
17400 asm_fprintf (stream, "[%r, %s%r",
17401 REGNO (base), is_minus ? "-" : "",
17402 REGNO (XEXP (index, 0)));
17403 arm_print_operand (stream, index, 'S');
17404 fputs ("]", stream);
17409 gcc_unreachable ();
17412 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17413 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17415 extern enum machine_mode output_memory_reference_mode;
17417 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17419 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17420 asm_fprintf (stream, "[%r, #%s%d]!",
17421 REGNO (XEXP (x, 0)),
17422 GET_CODE (x) == PRE_DEC ? "-" : "",
17423 GET_MODE_SIZE (output_memory_reference_mode));
17425 asm_fprintf (stream, "[%r], #%s%d",
17426 REGNO (XEXP (x, 0)),
17427 GET_CODE (x) == POST_DEC ? "-" : "",
17428 GET_MODE_SIZE (output_memory_reference_mode));
17430 else if (GET_CODE (x) == PRE_MODIFY)
17432 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17433 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17434 asm_fprintf (stream, "#%wd]!",
17435 INTVAL (XEXP (XEXP (x, 1), 1)));
17437 asm_fprintf (stream, "%r]!",
17438 REGNO (XEXP (XEXP (x, 1), 1)));
17440 else if (GET_CODE (x) == POST_MODIFY)
17442 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17443 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17444 asm_fprintf (stream, "#%wd",
17445 INTVAL (XEXP (XEXP (x, 1), 1)));
17447 asm_fprintf (stream, "%r",
17448 REGNO (XEXP (XEXP (x, 1), 1)));
17450 else output_addr_const (stream, x);
17454 if (GET_CODE (x) == REG)
17455 asm_fprintf (stream, "[%r]", REGNO (x));
17456 else if (GET_CODE (x) == POST_INC)
17457 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17458 else if (GET_CODE (x) == PLUS)
17460 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17461 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17462 asm_fprintf (stream, "[%r, #%wd]",
17463 REGNO (XEXP (x, 0)),
17464 INTVAL (XEXP (x, 1)));
17466 asm_fprintf (stream, "[%r, %r]",
17467 REGNO (XEXP (x, 0)),
17468 REGNO (XEXP (x, 1)));
17471 output_addr_const (stream, x);
17475 /* Target hook for indicating whether a punctuation character for
17476 TARGET_PRINT_OPERAND is valid. */
17478 arm_print_operand_punct_valid_p (unsigned char code)
17480 return (code == '@' || code == '|' || code == '.'
17481 || code == '(' || code == ')' || code == '#'
17482 || (TARGET_32BIT && (code == '?'))
17483 || (TARGET_THUMB2 && (code == '!'))
17484 || (TARGET_THUMB && (code == '_')));
17487 /* Target hook for assembling integer objects. The ARM version needs to
17488 handle word-sized values specially. */
17490 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17492 enum machine_mode mode;
17494 if (size == UNITS_PER_WORD && aligned_p)
17496 fputs ("\t.word\t", asm_out_file);
17497 output_addr_const (asm_out_file, x);
17499 /* Mark symbols as position independent. We only do this in the
17500 .text segment, not in the .data segment. */
17501 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17502 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17504 /* See legitimize_pic_address for an explanation of the
17505 TARGET_VXWORKS_RTP check. */
17506 if (TARGET_VXWORKS_RTP
17507 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17508 fputs ("(GOT)", asm_out_file);
17510 fputs ("(GOTOFF)", asm_out_file);
17512 fputc ('\n', asm_out_file);
17516 mode = GET_MODE (x);
17518 if (arm_vector_mode_supported_p (mode))
17522 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17524 units = CONST_VECTOR_NUNITS (x);
17525 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17527 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17528 for (i = 0; i < units; i++)
17530 rtx elt = CONST_VECTOR_ELT (x, i);
17532 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17535 for (i = 0; i < units; i++)
17537 rtx elt = CONST_VECTOR_ELT (x, i);
17538 REAL_VALUE_TYPE rval;
17540 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17543 (rval, GET_MODE_INNER (mode),
17544 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17550 return default_assemble_integer (x, size, aligned_p);
17554 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17558 if (!TARGET_AAPCS_BASED)
17561 default_named_section_asm_out_constructor
17562 : default_named_section_asm_out_destructor) (symbol, priority);
17566 /* Put these in the .init_array section, using a special relocation. */
17567 if (priority != DEFAULT_INIT_PRIORITY)
17570 sprintf (buf, "%s.%.5u",
17571 is_ctor ? ".init_array" : ".fini_array",
17573 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17580 switch_to_section (s);
17581 assemble_align (POINTER_SIZE);
17582 fputs ("\t.word\t", asm_out_file);
17583 output_addr_const (asm_out_file, symbol);
17584 fputs ("(target1)\n", asm_out_file);
17587 /* Add a function to the list of static constructors. */
17590 arm_elf_asm_constructor (rtx symbol, int priority)
17592 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17595 /* Add a function to the list of static destructors. */
17598 arm_elf_asm_destructor (rtx symbol, int priority)
17600 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17603 /* A finite state machine takes care of noticing whether or not instructions
17604 can be conditionally executed, and thus decrease execution time and code
17605 size by deleting branch instructions. The fsm is controlled by
17606 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17608 /* The state of the fsm controlling condition codes are:
17609 0: normal, do nothing special
17610 1: make ASM_OUTPUT_OPCODE not output this instruction
17611 2: make ASM_OUTPUT_OPCODE not output this instruction
17612 3: make instructions conditional
17613 4: make instructions conditional
17615 State transitions (state->state by whom under condition):
17616 0 -> 1 final_prescan_insn if the `target' is a label
17617 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17618 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17619 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17620 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17621 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17622 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17623 (the target insn is arm_target_insn).
17625 If the jump clobbers the conditions then we use states 2 and 4.
17627 A similar thing can be done with conditional return insns.
17629 XXX In case the `target' is an unconditional branch, this conditionalising
17630 of the instructions always reduces code size, but not always execution
17631 time. But then, I want to reduce the code size to somewhere near what
17632 /bin/cc produces. */
17634 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17635 instructions. When a COND_EXEC instruction is seen the subsequent
17636 instructions are scanned so that multiple conditional instructions can be
17637 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17638 specify the length and true/false mask for the IT block. These will be
17639 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17641 /* Returns the index of the ARM condition code string in
17642 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17643 COMPARISON should be an rtx like `(eq (...) (...))'. */
17646 maybe_get_arm_condition_code (rtx comparison)
17648 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17649 enum arm_cond_code code;
17650 enum rtx_code comp_code = GET_CODE (comparison);
17652 if (GET_MODE_CLASS (mode) != MODE_CC)
17653 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17654 XEXP (comparison, 1));
17658 case CC_DNEmode: code = ARM_NE; goto dominance;
17659 case CC_DEQmode: code = ARM_EQ; goto dominance;
17660 case CC_DGEmode: code = ARM_GE; goto dominance;
17661 case CC_DGTmode: code = ARM_GT; goto dominance;
17662 case CC_DLEmode: code = ARM_LE; goto dominance;
17663 case CC_DLTmode: code = ARM_LT; goto dominance;
17664 case CC_DGEUmode: code = ARM_CS; goto dominance;
17665 case CC_DGTUmode: code = ARM_HI; goto dominance;
17666 case CC_DLEUmode: code = ARM_LS; goto dominance;
17667 case CC_DLTUmode: code = ARM_CC;
17670 if (comp_code == EQ)
17671 return ARM_INVERSE_CONDITION_CODE (code);
17672 if (comp_code == NE)
17679 case NE: return ARM_NE;
17680 case EQ: return ARM_EQ;
17681 case GE: return ARM_PL;
17682 case LT: return ARM_MI;
17683 default: return ARM_NV;
17689 case NE: return ARM_NE;
17690 case EQ: return ARM_EQ;
17691 default: return ARM_NV;
17697 case NE: return ARM_MI;
17698 case EQ: return ARM_PL;
17699 default: return ARM_NV;
17704 /* We can handle all cases except UNEQ and LTGT. */
17707 case GE: return ARM_GE;
17708 case GT: return ARM_GT;
17709 case LE: return ARM_LS;
17710 case LT: return ARM_MI;
17711 case NE: return ARM_NE;
17712 case EQ: return ARM_EQ;
17713 case ORDERED: return ARM_VC;
17714 case UNORDERED: return ARM_VS;
17715 case UNLT: return ARM_LT;
17716 case UNLE: return ARM_LE;
17717 case UNGT: return ARM_HI;
17718 case UNGE: return ARM_PL;
17719 /* UNEQ and LTGT do not have a representation. */
17720 case UNEQ: /* Fall through. */
17721 case LTGT: /* Fall through. */
17722 default: return ARM_NV;
17728 case NE: return ARM_NE;
17729 case EQ: return ARM_EQ;
17730 case GE: return ARM_LE;
17731 case GT: return ARM_LT;
17732 case LE: return ARM_GE;
17733 case LT: return ARM_GT;
17734 case GEU: return ARM_LS;
17735 case GTU: return ARM_CC;
17736 case LEU: return ARM_CS;
17737 case LTU: return ARM_HI;
17738 default: return ARM_NV;
17744 case LTU: return ARM_CS;
17745 case GEU: return ARM_CC;
17746 default: return ARM_NV;
17752 case NE: return ARM_NE;
17753 case EQ: return ARM_EQ;
17754 case GEU: return ARM_CS;
17755 case GTU: return ARM_HI;
17756 case LEU: return ARM_LS;
17757 case LTU: return ARM_CC;
17758 default: return ARM_NV;
17764 case GE: return ARM_GE;
17765 case LT: return ARM_LT;
17766 case GEU: return ARM_CS;
17767 case LTU: return ARM_CC;
17768 default: return ARM_NV;
17774 case NE: return ARM_NE;
17775 case EQ: return ARM_EQ;
17776 case GE: return ARM_GE;
17777 case GT: return ARM_GT;
17778 case LE: return ARM_LE;
17779 case LT: return ARM_LT;
17780 case GEU: return ARM_CS;
17781 case GTU: return ARM_HI;
17782 case LEU: return ARM_LS;
17783 case LTU: return ARM_CC;
17784 default: return ARM_NV;
17787 default: gcc_unreachable ();
17791 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17792 static enum arm_cond_code
17793 get_arm_condition_code (rtx comparison)
17795 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17796 gcc_assert (code != ARM_NV);
17800 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17803 thumb2_final_prescan_insn (rtx insn)
17805 rtx first_insn = insn;
17806 rtx body = PATTERN (insn);
17808 enum arm_cond_code code;
17812 /* Remove the previous insn from the count of insns to be output. */
17813 if (arm_condexec_count)
17814 arm_condexec_count--;
17816 /* Nothing to do if we are already inside a conditional block. */
17817 if (arm_condexec_count)
17820 if (GET_CODE (body) != COND_EXEC)
17823 /* Conditional jumps are implemented directly. */
17824 if (GET_CODE (insn) == JUMP_INSN)
17827 predicate = COND_EXEC_TEST (body);
17828 arm_current_cc = get_arm_condition_code (predicate);
17830 n = get_attr_ce_count (insn);
17831 arm_condexec_count = 1;
17832 arm_condexec_mask = (1 << n) - 1;
17833 arm_condexec_masklen = n;
17834 /* See if subsequent instructions can be combined into the same block. */
17837 insn = next_nonnote_insn (insn);
17839 /* Jumping into the middle of an IT block is illegal, so a label or
17840 barrier terminates the block. */
17841 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17844 body = PATTERN (insn);
17845 /* USE and CLOBBER aren't really insns, so just skip them. */
17846 if (GET_CODE (body) == USE
17847 || GET_CODE (body) == CLOBBER)
17850 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17851 if (GET_CODE (body) != COND_EXEC)
17853 /* Allow up to 4 conditionally executed instructions in a block. */
17854 n = get_attr_ce_count (insn);
17855 if (arm_condexec_masklen + n > 4)
17858 predicate = COND_EXEC_TEST (body);
17859 code = get_arm_condition_code (predicate);
17860 mask = (1 << n) - 1;
17861 if (arm_current_cc == code)
17862 arm_condexec_mask |= (mask << arm_condexec_masklen);
17863 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17866 arm_condexec_count++;
17867 arm_condexec_masklen += n;
17869 /* A jump must be the last instruction in a conditional block. */
17870 if (GET_CODE(insn) == JUMP_INSN)
17873 /* Restore recog_data (getting the attributes of other insns can
17874 destroy this array, but final.c assumes that it remains intact
17875 across this call). */
17876 extract_constrain_insn_cached (first_insn);
17880 arm_final_prescan_insn (rtx insn)
17882 /* BODY will hold the body of INSN. */
17883 rtx body = PATTERN (insn);
17885 /* This will be 1 if trying to repeat the trick, and things need to be
17886 reversed if it appears to fail. */
17889 /* If we start with a return insn, we only succeed if we find another one. */
17890 int seeking_return = 0;
17891 enum rtx_code return_code = UNKNOWN;
17893 /* START_INSN will hold the insn from where we start looking. This is the
17894 first insn after the following code_label if REVERSE is true. */
17895 rtx start_insn = insn;
17897 /* If in state 4, check if the target branch is reached, in order to
17898 change back to state 0. */
17899 if (arm_ccfsm_state == 4)
17901 if (insn == arm_target_insn)
17903 arm_target_insn = NULL;
17904 arm_ccfsm_state = 0;
17909 /* If in state 3, it is possible to repeat the trick, if this insn is an
17910 unconditional branch to a label, and immediately following this branch
17911 is the previous target label which is only used once, and the label this
17912 branch jumps to is not too far off. */
17913 if (arm_ccfsm_state == 3)
17915 if (simplejump_p (insn))
17917 start_insn = next_nonnote_insn (start_insn);
17918 if (GET_CODE (start_insn) == BARRIER)
17920 /* XXX Isn't this always a barrier? */
17921 start_insn = next_nonnote_insn (start_insn);
17923 if (GET_CODE (start_insn) == CODE_LABEL
17924 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17925 && LABEL_NUSES (start_insn) == 1)
17930 else if (ANY_RETURN_P (body))
17932 start_insn = next_nonnote_insn (start_insn);
17933 if (GET_CODE (start_insn) == BARRIER)
17934 start_insn = next_nonnote_insn (start_insn);
17935 if (GET_CODE (start_insn) == CODE_LABEL
17936 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17937 && LABEL_NUSES (start_insn) == 1)
17940 seeking_return = 1;
17941 return_code = GET_CODE (body);
17950 gcc_assert (!arm_ccfsm_state || reverse);
17951 if (GET_CODE (insn) != JUMP_INSN)
17954 /* This jump might be paralleled with a clobber of the condition codes
17955 the jump should always come first */
17956 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17957 body = XVECEXP (body, 0, 0);
17960 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17961 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17964 int fail = FALSE, succeed = FALSE;
17965 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17966 int then_not_else = TRUE;
17967 rtx this_insn = start_insn, label = 0;
17969 /* Register the insn jumped to. */
17972 if (!seeking_return)
17973 label = XEXP (SET_SRC (body), 0);
17975 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17976 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17977 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17979 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17980 then_not_else = FALSE;
17982 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
17984 seeking_return = 1;
17985 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
17987 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
17989 seeking_return = 1;
17990 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
17991 then_not_else = FALSE;
17994 gcc_unreachable ();
17996 /* See how many insns this branch skips, and what kind of insns. If all
17997 insns are okay, and the label or unconditional branch to the same
17998 label is not too far away, succeed. */
17999 for (insns_skipped = 0;
18000 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18004 this_insn = next_nonnote_insn (this_insn);
18008 switch (GET_CODE (this_insn))
18011 /* Succeed if it is the target label, otherwise fail since
18012 control falls in from somewhere else. */
18013 if (this_insn == label)
18015 arm_ccfsm_state = 1;
18023 /* Succeed if the following insn is the target label.
18025 If return insns are used then the last insn in a function
18026 will be a barrier. */
18027 this_insn = next_nonnote_insn (this_insn);
18028 if (this_insn && this_insn == label)
18030 arm_ccfsm_state = 1;
18038 /* The AAPCS says that conditional calls should not be
18039 used since they make interworking inefficient (the
18040 linker can't transform BL<cond> into BLX). That's
18041 only a problem if the machine has BLX. */
18048 /* Succeed if the following insn is the target label, or
18049 if the following two insns are a barrier and the
18051 this_insn = next_nonnote_insn (this_insn);
18052 if (this_insn && GET_CODE (this_insn) == BARRIER)
18053 this_insn = next_nonnote_insn (this_insn);
18055 if (this_insn && this_insn == label
18056 && insns_skipped < max_insns_skipped)
18058 arm_ccfsm_state = 1;
18066 /* If this is an unconditional branch to the same label, succeed.
18067 If it is to another label, do nothing. If it is conditional,
18069 /* XXX Probably, the tests for SET and the PC are
18072 scanbody = PATTERN (this_insn);
18073 if (GET_CODE (scanbody) == SET
18074 && GET_CODE (SET_DEST (scanbody)) == PC)
18076 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18077 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18079 arm_ccfsm_state = 2;
18082 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18085 /* Fail if a conditional return is undesirable (e.g. on a
18086 StrongARM), but still allow this if optimizing for size. */
18087 else if (GET_CODE (scanbody) == return_code
18088 && !use_return_insn (TRUE, NULL)
18091 else if (GET_CODE (scanbody) == return_code)
18093 arm_ccfsm_state = 2;
18096 else if (GET_CODE (scanbody) == PARALLEL)
18098 switch (get_attr_conds (this_insn))
18108 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18113 /* Instructions using or affecting the condition codes make it
18115 scanbody = PATTERN (this_insn);
18116 if (!(GET_CODE (scanbody) == SET
18117 || GET_CODE (scanbody) == PARALLEL)
18118 || get_attr_conds (this_insn) != CONDS_NOCOND)
18128 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18129 arm_target_label = CODE_LABEL_NUMBER (label);
18132 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18134 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18136 this_insn = next_nonnote_insn (this_insn);
18137 gcc_assert (!this_insn
18138 || (GET_CODE (this_insn) != BARRIER
18139 && GET_CODE (this_insn) != CODE_LABEL));
18143 /* Oh, dear! we ran off the end.. give up. */
18144 extract_constrain_insn_cached (insn);
18145 arm_ccfsm_state = 0;
18146 arm_target_insn = NULL;
18149 arm_target_insn = this_insn;
18152 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18155 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18157 if (reverse || then_not_else)
18158 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18161 /* Restore recog_data (getting the attributes of other insns can
18162 destroy this array, but final.c assumes that it remains intact
18163 across this call. */
18164 extract_constrain_insn_cached (insn);
18168 /* Output IT instructions. */
18170 thumb2_asm_output_opcode (FILE * stream)
18175 if (arm_condexec_mask)
18177 for (n = 0; n < arm_condexec_masklen; n++)
18178 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18180 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18181 arm_condition_codes[arm_current_cc]);
18182 arm_condexec_mask = 0;
18186 /* Returns true if REGNO is a valid register
18187 for holding a quantity of type MODE. */
18189 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18191 if (GET_MODE_CLASS (mode) == MODE_CC)
18192 return (regno == CC_REGNUM
18193 || (TARGET_HARD_FLOAT && TARGET_VFP
18194 && regno == VFPCC_REGNUM));
18197 /* For the Thumb we only allow values bigger than SImode in
18198 registers 0 - 6, so that there is always a second low
18199 register available to hold the upper part of the value.
18200 We probably we ought to ensure that the register is the
18201 start of an even numbered register pair. */
18202 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18204 if (TARGET_HARD_FLOAT && TARGET_VFP
18205 && IS_VFP_REGNUM (regno))
18207 if (mode == SFmode || mode == SImode)
18208 return VFP_REGNO_OK_FOR_SINGLE (regno);
18210 if (mode == DFmode)
18211 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18213 /* VFP registers can hold HFmode values, but there is no point in
18214 putting them there unless we have hardware conversion insns. */
18215 if (mode == HFmode)
18216 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18219 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18220 || (VALID_NEON_QREG_MODE (mode)
18221 && NEON_REGNO_OK_FOR_QUAD (regno))
18222 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18223 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18224 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18225 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18226 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18231 if (TARGET_REALLY_IWMMXT)
18233 if (IS_IWMMXT_GR_REGNUM (regno))
18234 return mode == SImode;
18236 if (IS_IWMMXT_REGNUM (regno))
18237 return VALID_IWMMXT_REG_MODE (mode);
18240 /* We allow almost any value to be stored in the general registers.
18241 Restrict doubleword quantities to even register pairs so that we can
18242 use ldrd. Do not allow very large Neon structure opaque modes in
18243 general registers; they would use too many. */
18244 if (regno <= LAST_ARM_REGNUM)
18245 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18246 && ARM_NUM_REGS (mode) <= 4;
18248 if (regno == FRAME_POINTER_REGNUM
18249 || regno == ARG_POINTER_REGNUM)
18250 /* We only allow integers in the fake hard registers. */
18251 return GET_MODE_CLASS (mode) == MODE_INT;
18256 /* Implement MODES_TIEABLE_P. */
18259 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18261 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18264 /* We specifically want to allow elements of "structure" modes to
18265 be tieable to the structure. This more general condition allows
18266 other rarer situations too. */
18268 && (VALID_NEON_DREG_MODE (mode1)
18269 || VALID_NEON_QREG_MODE (mode1)
18270 || VALID_NEON_STRUCT_MODE (mode1))
18271 && (VALID_NEON_DREG_MODE (mode2)
18272 || VALID_NEON_QREG_MODE (mode2)
18273 || VALID_NEON_STRUCT_MODE (mode2)))
18279 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18280 not used in arm mode. */
18283 arm_regno_class (int regno)
18287 if (regno == STACK_POINTER_REGNUM)
18289 if (regno == CC_REGNUM)
18296 if (TARGET_THUMB2 && regno < 8)
18299 if ( regno <= LAST_ARM_REGNUM
18300 || regno == FRAME_POINTER_REGNUM
18301 || regno == ARG_POINTER_REGNUM)
18302 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18304 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18305 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18307 if (IS_VFP_REGNUM (regno))
18309 if (regno <= D7_VFP_REGNUM)
18310 return VFP_D0_D7_REGS;
18311 else if (regno <= LAST_LO_VFP_REGNUM)
18312 return VFP_LO_REGS;
18314 return VFP_HI_REGS;
18317 if (IS_IWMMXT_REGNUM (regno))
18318 return IWMMXT_REGS;
18320 if (IS_IWMMXT_GR_REGNUM (regno))
18321 return IWMMXT_GR_REGS;
18326 /* Handle a special case when computing the offset
18327 of an argument from the frame pointer. */
18329 arm_debugger_arg_offset (int value, rtx addr)
18333 /* We are only interested if dbxout_parms() failed to compute the offset. */
18337 /* We can only cope with the case where the address is held in a register. */
18338 if (GET_CODE (addr) != REG)
18341 /* If we are using the frame pointer to point at the argument, then
18342 an offset of 0 is correct. */
18343 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18346 /* If we are using the stack pointer to point at the
18347 argument, then an offset of 0 is correct. */
18348 /* ??? Check this is consistent with thumb2 frame layout. */
18349 if ((TARGET_THUMB || !frame_pointer_needed)
18350 && REGNO (addr) == SP_REGNUM)
18353 /* Oh dear. The argument is pointed to by a register rather
18354 than being held in a register, or being stored at a known
18355 offset from the frame pointer. Since GDB only understands
18356 those two kinds of argument we must translate the address
18357 held in the register into an offset from the frame pointer.
18358 We do this by searching through the insns for the function
18359 looking to see where this register gets its value. If the
18360 register is initialized from the frame pointer plus an offset
18361 then we are in luck and we can continue, otherwise we give up.
18363 This code is exercised by producing debugging information
18364 for a function with arguments like this:
18366 double func (double a, double b, int c, double d) {return d;}
18368 Without this code the stab for parameter 'd' will be set to
18369 an offset of 0 from the frame pointer, rather than 8. */
18371 /* The if() statement says:
18373 If the insn is a normal instruction
18374 and if the insn is setting the value in a register
18375 and if the register being set is the register holding the address of the argument
18376 and if the address is computing by an addition
18377 that involves adding to a register
18378 which is the frame pointer
18383 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18385 if ( GET_CODE (insn) == INSN
18386 && GET_CODE (PATTERN (insn)) == SET
18387 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18388 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18389 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18390 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18391 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18394 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18403 warning (0, "unable to compute real location of stacked parameter");
18404 value = 8; /* XXX magic hack */
18424 T_MAX /* Size of enum. Keep last. */
18425 } neon_builtin_type_mode;
18427 #define TYPE_MODE_BIT(X) (1 << (X))
18429 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18430 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18431 | TYPE_MODE_BIT (T_DI))
18432 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18433 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18434 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18436 #define v8qi_UP T_V8QI
18437 #define v4hi_UP T_V4HI
18438 #define v2si_UP T_V2SI
18439 #define v2sf_UP T_V2SF
18441 #define v16qi_UP T_V16QI
18442 #define v8hi_UP T_V8HI
18443 #define v4si_UP T_V4SI
18444 #define v4sf_UP T_V4SF
18445 #define v2di_UP T_V2DI
18450 #define UP(X) X##_UP
18483 NEON_LOADSTRUCTLANE,
18485 NEON_STORESTRUCTLANE,
18494 const neon_itype itype;
18495 const neon_builtin_type_mode mode;
18496 const enum insn_code code;
18497 unsigned int fcode;
18498 } neon_builtin_datum;
18500 #define CF(N,X) CODE_FOR_neon_##N##X
18502 #define VAR1(T, N, A) \
18503 {#N, NEON_##T, UP (A), CF (N, A), 0}
18504 #define VAR2(T, N, A, B) \
18506 {#N, NEON_##T, UP (B), CF (N, B), 0}
18507 #define VAR3(T, N, A, B, C) \
18508 VAR2 (T, N, A, B), \
18509 {#N, NEON_##T, UP (C), CF (N, C), 0}
18510 #define VAR4(T, N, A, B, C, D) \
18511 VAR3 (T, N, A, B, C), \
18512 {#N, NEON_##T, UP (D), CF (N, D), 0}
18513 #define VAR5(T, N, A, B, C, D, E) \
18514 VAR4 (T, N, A, B, C, D), \
18515 {#N, NEON_##T, UP (E), CF (N, E), 0}
18516 #define VAR6(T, N, A, B, C, D, E, F) \
18517 VAR5 (T, N, A, B, C, D, E), \
18518 {#N, NEON_##T, UP (F), CF (N, F), 0}
18519 #define VAR7(T, N, A, B, C, D, E, F, G) \
18520 VAR6 (T, N, A, B, C, D, E, F), \
18521 {#N, NEON_##T, UP (G), CF (N, G), 0}
18522 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18523 VAR7 (T, N, A, B, C, D, E, F, G), \
18524 {#N, NEON_##T, UP (H), CF (N, H), 0}
18525 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18526 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18527 {#N, NEON_##T, UP (I), CF (N, I), 0}
18528 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18529 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18530 {#N, NEON_##T, UP (J), CF (N, J), 0}
18532 /* The mode entries in the following table correspond to the "key" type of the
18533 instruction variant, i.e. equivalent to that which would be specified after
18534 the assembler mnemonic, which usually refers to the last vector operand.
18535 (Signed/unsigned/polynomial types are not differentiated between though, and
18536 are all mapped onto the same mode for a given element size.) The modes
18537 listed per instruction should be the same as those defined for that
18538 instruction's pattern in neon.md. */
18540 static neon_builtin_datum neon_builtin_data[] =
18542 VAR10 (BINOP, vadd,
18543 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18544 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18545 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18546 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18547 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18548 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18549 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18550 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18551 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18552 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18553 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18554 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18555 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18556 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18557 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18558 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18559 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18560 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18561 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18562 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18563 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18564 VAR2 (BINOP, vqdmull, v4hi, v2si),
18565 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18566 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18567 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18568 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18569 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18570 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18571 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18572 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18573 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18574 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18575 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18576 VAR10 (BINOP, vsub,
18577 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18578 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18579 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18580 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18581 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18582 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18583 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18584 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18585 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18586 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18587 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18588 VAR2 (BINOP, vcage, v2sf, v4sf),
18589 VAR2 (BINOP, vcagt, v2sf, v4sf),
18590 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18591 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18592 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18593 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18594 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18595 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18596 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18597 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18598 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18599 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18600 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18601 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18602 VAR2 (BINOP, vrecps, v2sf, v4sf),
18603 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18604 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18605 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18606 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18607 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18608 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18609 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18610 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18611 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18612 VAR2 (UNOP, vcnt, v8qi, v16qi),
18613 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18614 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18615 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18616 /* FIXME: vget_lane supports more variants than this! */
18617 VAR10 (GETLANE, vget_lane,
18618 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18619 VAR10 (SETLANE, vset_lane,
18620 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18621 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18622 VAR10 (DUP, vdup_n,
18623 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18624 VAR10 (DUPLANE, vdup_lane,
18625 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18626 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18627 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18628 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18629 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18630 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18631 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18632 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18633 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18634 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18635 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18636 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18637 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18638 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18639 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18640 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18641 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18642 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18643 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18644 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18645 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18646 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18647 VAR10 (BINOP, vext,
18648 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18649 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18650 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18651 VAR2 (UNOP, vrev16, v8qi, v16qi),
18652 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18653 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18654 VAR10 (SELECT, vbsl,
18655 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18656 VAR1 (VTBL, vtbl1, v8qi),
18657 VAR1 (VTBL, vtbl2, v8qi),
18658 VAR1 (VTBL, vtbl3, v8qi),
18659 VAR1 (VTBL, vtbl4, v8qi),
18660 VAR1 (VTBX, vtbx1, v8qi),
18661 VAR1 (VTBX, vtbx2, v8qi),
18662 VAR1 (VTBX, vtbx3, v8qi),
18663 VAR1 (VTBX, vtbx4, v8qi),
18664 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18665 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18666 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18667 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18668 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18669 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18670 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18671 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18672 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18673 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18674 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18675 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18676 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18677 VAR10 (LOAD1, vld1,
18678 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18679 VAR10 (LOAD1LANE, vld1_lane,
18680 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18681 VAR10 (LOAD1, vld1_dup,
18682 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18683 VAR10 (STORE1, vst1,
18684 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18685 VAR10 (STORE1LANE, vst1_lane,
18686 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18688 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18689 VAR7 (LOADSTRUCTLANE, vld2_lane,
18690 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18691 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18692 VAR9 (STORESTRUCT, vst2,
18693 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18694 VAR7 (STORESTRUCTLANE, vst2_lane,
18695 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18697 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18698 VAR7 (LOADSTRUCTLANE, vld3_lane,
18699 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18700 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18701 VAR9 (STORESTRUCT, vst3,
18702 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18703 VAR7 (STORESTRUCTLANE, vst3_lane,
18704 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18705 VAR9 (LOADSTRUCT, vld4,
18706 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18707 VAR7 (LOADSTRUCTLANE, vld4_lane,
18708 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18709 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18710 VAR9 (STORESTRUCT, vst4,
18711 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18712 VAR7 (STORESTRUCTLANE, vst4_lane,
18713 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18714 VAR10 (LOGICBINOP, vand,
18715 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18716 VAR10 (LOGICBINOP, vorr,
18717 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18718 VAR10 (BINOP, veor,
18719 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18720 VAR10 (LOGICBINOP, vbic,
18721 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18722 VAR10 (LOGICBINOP, vorn,
18723 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18738 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18739 symbolic names defined here (which would require too much duplication).
18743 ARM_BUILTIN_GETWCGR0,
18744 ARM_BUILTIN_GETWCGR1,
18745 ARM_BUILTIN_GETWCGR2,
18746 ARM_BUILTIN_GETWCGR3,
18748 ARM_BUILTIN_SETWCGR0,
18749 ARM_BUILTIN_SETWCGR1,
18750 ARM_BUILTIN_SETWCGR2,
18751 ARM_BUILTIN_SETWCGR3,
18755 ARM_BUILTIN_WAVG2BR,
18756 ARM_BUILTIN_WAVG2HR,
18757 ARM_BUILTIN_WAVG2B,
18758 ARM_BUILTIN_WAVG2H,
18765 ARM_BUILTIN_WMACSZ,
18767 ARM_BUILTIN_WMACUZ,
18770 ARM_BUILTIN_WSADBZ,
18772 ARM_BUILTIN_WSADHZ,
18774 ARM_BUILTIN_WALIGNI,
18775 ARM_BUILTIN_WALIGNR0,
18776 ARM_BUILTIN_WALIGNR1,
18777 ARM_BUILTIN_WALIGNR2,
18778 ARM_BUILTIN_WALIGNR3,
18781 ARM_BUILTIN_TMIAPH,
18782 ARM_BUILTIN_TMIABB,
18783 ARM_BUILTIN_TMIABT,
18784 ARM_BUILTIN_TMIATB,
18785 ARM_BUILTIN_TMIATT,
18787 ARM_BUILTIN_TMOVMSKB,
18788 ARM_BUILTIN_TMOVMSKH,
18789 ARM_BUILTIN_TMOVMSKW,
18791 ARM_BUILTIN_TBCSTB,
18792 ARM_BUILTIN_TBCSTH,
18793 ARM_BUILTIN_TBCSTW,
18795 ARM_BUILTIN_WMADDS,
18796 ARM_BUILTIN_WMADDU,
18798 ARM_BUILTIN_WPACKHSS,
18799 ARM_BUILTIN_WPACKWSS,
18800 ARM_BUILTIN_WPACKDSS,
18801 ARM_BUILTIN_WPACKHUS,
18802 ARM_BUILTIN_WPACKWUS,
18803 ARM_BUILTIN_WPACKDUS,
18808 ARM_BUILTIN_WADDSSB,
18809 ARM_BUILTIN_WADDSSH,
18810 ARM_BUILTIN_WADDSSW,
18811 ARM_BUILTIN_WADDUSB,
18812 ARM_BUILTIN_WADDUSH,
18813 ARM_BUILTIN_WADDUSW,
18817 ARM_BUILTIN_WSUBSSB,
18818 ARM_BUILTIN_WSUBSSH,
18819 ARM_BUILTIN_WSUBSSW,
18820 ARM_BUILTIN_WSUBUSB,
18821 ARM_BUILTIN_WSUBUSH,
18822 ARM_BUILTIN_WSUBUSW,
18829 ARM_BUILTIN_WCMPEQB,
18830 ARM_BUILTIN_WCMPEQH,
18831 ARM_BUILTIN_WCMPEQW,
18832 ARM_BUILTIN_WCMPGTUB,
18833 ARM_BUILTIN_WCMPGTUH,
18834 ARM_BUILTIN_WCMPGTUW,
18835 ARM_BUILTIN_WCMPGTSB,
18836 ARM_BUILTIN_WCMPGTSH,
18837 ARM_BUILTIN_WCMPGTSW,
18839 ARM_BUILTIN_TEXTRMSB,
18840 ARM_BUILTIN_TEXTRMSH,
18841 ARM_BUILTIN_TEXTRMSW,
18842 ARM_BUILTIN_TEXTRMUB,
18843 ARM_BUILTIN_TEXTRMUH,
18844 ARM_BUILTIN_TEXTRMUW,
18845 ARM_BUILTIN_TINSRB,
18846 ARM_BUILTIN_TINSRH,
18847 ARM_BUILTIN_TINSRW,
18849 ARM_BUILTIN_WMAXSW,
18850 ARM_BUILTIN_WMAXSH,
18851 ARM_BUILTIN_WMAXSB,
18852 ARM_BUILTIN_WMAXUW,
18853 ARM_BUILTIN_WMAXUH,
18854 ARM_BUILTIN_WMAXUB,
18855 ARM_BUILTIN_WMINSW,
18856 ARM_BUILTIN_WMINSH,
18857 ARM_BUILTIN_WMINSB,
18858 ARM_BUILTIN_WMINUW,
18859 ARM_BUILTIN_WMINUH,
18860 ARM_BUILTIN_WMINUB,
18862 ARM_BUILTIN_WMULUM,
18863 ARM_BUILTIN_WMULSM,
18864 ARM_BUILTIN_WMULUL,
18866 ARM_BUILTIN_PSADBH,
18867 ARM_BUILTIN_WSHUFH,
18881 ARM_BUILTIN_WSLLHI,
18882 ARM_BUILTIN_WSLLWI,
18883 ARM_BUILTIN_WSLLDI,
18884 ARM_BUILTIN_WSRAHI,
18885 ARM_BUILTIN_WSRAWI,
18886 ARM_BUILTIN_WSRADI,
18887 ARM_BUILTIN_WSRLHI,
18888 ARM_BUILTIN_WSRLWI,
18889 ARM_BUILTIN_WSRLDI,
18890 ARM_BUILTIN_WRORHI,
18891 ARM_BUILTIN_WRORWI,
18892 ARM_BUILTIN_WRORDI,
18894 ARM_BUILTIN_WUNPCKIHB,
18895 ARM_BUILTIN_WUNPCKIHH,
18896 ARM_BUILTIN_WUNPCKIHW,
18897 ARM_BUILTIN_WUNPCKILB,
18898 ARM_BUILTIN_WUNPCKILH,
18899 ARM_BUILTIN_WUNPCKILW,
18901 ARM_BUILTIN_WUNPCKEHSB,
18902 ARM_BUILTIN_WUNPCKEHSH,
18903 ARM_BUILTIN_WUNPCKEHSW,
18904 ARM_BUILTIN_WUNPCKEHUB,
18905 ARM_BUILTIN_WUNPCKEHUH,
18906 ARM_BUILTIN_WUNPCKEHUW,
18907 ARM_BUILTIN_WUNPCKELSB,
18908 ARM_BUILTIN_WUNPCKELSH,
18909 ARM_BUILTIN_WUNPCKELSW,
18910 ARM_BUILTIN_WUNPCKELUB,
18911 ARM_BUILTIN_WUNPCKELUH,
18912 ARM_BUILTIN_WUNPCKELUW,
18918 ARM_BUILTIN_WADDSUBHX,
18919 ARM_BUILTIN_WSUBADDHX,
18921 ARM_BUILTIN_WABSDIFFB,
18922 ARM_BUILTIN_WABSDIFFH,
18923 ARM_BUILTIN_WABSDIFFW,
18925 ARM_BUILTIN_WADDCH,
18926 ARM_BUILTIN_WADDCW,
18929 ARM_BUILTIN_WAVG4R,
18931 ARM_BUILTIN_WMADDSX,
18932 ARM_BUILTIN_WMADDUX,
18934 ARM_BUILTIN_WMADDSN,
18935 ARM_BUILTIN_WMADDUN,
18937 ARM_BUILTIN_WMULWSM,
18938 ARM_BUILTIN_WMULWUM,
18940 ARM_BUILTIN_WMULWSMR,
18941 ARM_BUILTIN_WMULWUMR,
18943 ARM_BUILTIN_WMULWL,
18945 ARM_BUILTIN_WMULSMR,
18946 ARM_BUILTIN_WMULUMR,
18948 ARM_BUILTIN_WQMULM,
18949 ARM_BUILTIN_WQMULMR,
18951 ARM_BUILTIN_WQMULWM,
18952 ARM_BUILTIN_WQMULWMR,
18954 ARM_BUILTIN_WADDBHUSM,
18955 ARM_BUILTIN_WADDBHUSL,
18957 ARM_BUILTIN_WQMIABB,
18958 ARM_BUILTIN_WQMIABT,
18959 ARM_BUILTIN_WQMIATB,
18960 ARM_BUILTIN_WQMIATT,
18962 ARM_BUILTIN_WQMIABBN,
18963 ARM_BUILTIN_WQMIABTN,
18964 ARM_BUILTIN_WQMIATBN,
18965 ARM_BUILTIN_WQMIATTN,
18967 ARM_BUILTIN_WMIABB,
18968 ARM_BUILTIN_WMIABT,
18969 ARM_BUILTIN_WMIATB,
18970 ARM_BUILTIN_WMIATT,
18972 ARM_BUILTIN_WMIABBN,
18973 ARM_BUILTIN_WMIABTN,
18974 ARM_BUILTIN_WMIATBN,
18975 ARM_BUILTIN_WMIATTN,
18977 ARM_BUILTIN_WMIAWBB,
18978 ARM_BUILTIN_WMIAWBT,
18979 ARM_BUILTIN_WMIAWTB,
18980 ARM_BUILTIN_WMIAWTT,
18982 ARM_BUILTIN_WMIAWBBN,
18983 ARM_BUILTIN_WMIAWBTN,
18984 ARM_BUILTIN_WMIAWTBN,
18985 ARM_BUILTIN_WMIAWTTN,
18987 ARM_BUILTIN_WMERGE,
18989 ARM_BUILTIN_THREAD_POINTER,
18991 ARM_BUILTIN_NEON_BASE,
18993 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18996 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18999 arm_init_neon_builtins (void)
19001 unsigned int i, fcode;
19004 tree neon_intQI_type_node;
19005 tree neon_intHI_type_node;
19006 tree neon_polyQI_type_node;
19007 tree neon_polyHI_type_node;
19008 tree neon_intSI_type_node;
19009 tree neon_intDI_type_node;
19010 tree neon_float_type_node;
19012 tree intQI_pointer_node;
19013 tree intHI_pointer_node;
19014 tree intSI_pointer_node;
19015 tree intDI_pointer_node;
19016 tree float_pointer_node;
19018 tree const_intQI_node;
19019 tree const_intHI_node;
19020 tree const_intSI_node;
19021 tree const_intDI_node;
19022 tree const_float_node;
19024 tree const_intQI_pointer_node;
19025 tree const_intHI_pointer_node;
19026 tree const_intSI_pointer_node;
19027 tree const_intDI_pointer_node;
19028 tree const_float_pointer_node;
19030 tree V8QI_type_node;
19031 tree V4HI_type_node;
19032 tree V2SI_type_node;
19033 tree V2SF_type_node;
19034 tree V16QI_type_node;
19035 tree V8HI_type_node;
19036 tree V4SI_type_node;
19037 tree V4SF_type_node;
19038 tree V2DI_type_node;
19040 tree intUQI_type_node;
19041 tree intUHI_type_node;
19042 tree intUSI_type_node;
19043 tree intUDI_type_node;
19045 tree intEI_type_node;
19046 tree intOI_type_node;
19047 tree intCI_type_node;
19048 tree intXI_type_node;
19050 tree V8QI_pointer_node;
19051 tree V4HI_pointer_node;
19052 tree V2SI_pointer_node;
19053 tree V2SF_pointer_node;
19054 tree V16QI_pointer_node;
19055 tree V8HI_pointer_node;
19056 tree V4SI_pointer_node;
19057 tree V4SF_pointer_node;
19058 tree V2DI_pointer_node;
19060 tree void_ftype_pv8qi_v8qi_v8qi;
19061 tree void_ftype_pv4hi_v4hi_v4hi;
19062 tree void_ftype_pv2si_v2si_v2si;
19063 tree void_ftype_pv2sf_v2sf_v2sf;
19064 tree void_ftype_pdi_di_di;
19065 tree void_ftype_pv16qi_v16qi_v16qi;
19066 tree void_ftype_pv8hi_v8hi_v8hi;
19067 tree void_ftype_pv4si_v4si_v4si;
19068 tree void_ftype_pv4sf_v4sf_v4sf;
19069 tree void_ftype_pv2di_v2di_v2di;
19071 tree reinterp_ftype_dreg[5][5];
19072 tree reinterp_ftype_qreg[5][5];
19073 tree dreg_types[5], qreg_types[5];
19075 /* Create distinguished type nodes for NEON vector element types,
19076 and pointers to values of such types, so we can detect them later. */
19077 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19078 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19079 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19080 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19081 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19082 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19083 neon_float_type_node = make_node (REAL_TYPE);
19084 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19085 layout_type (neon_float_type_node);
19087 /* Define typedefs which exactly correspond to the modes we are basing vector
19088 types on. If you change these names you'll need to change
19089 the table used by arm_mangle_type too. */
19090 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19091 "__builtin_neon_qi");
19092 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19093 "__builtin_neon_hi");
19094 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19095 "__builtin_neon_si");
19096 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19097 "__builtin_neon_sf");
19098 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19099 "__builtin_neon_di");
19100 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19101 "__builtin_neon_poly8");
19102 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19103 "__builtin_neon_poly16");
19105 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19106 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19107 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19108 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19109 float_pointer_node = build_pointer_type (neon_float_type_node);
19111 /* Next create constant-qualified versions of the above types. */
19112 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19114 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19116 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19118 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19120 const_float_node = build_qualified_type (neon_float_type_node,
19123 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19124 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19125 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19126 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19127 const_float_pointer_node = build_pointer_type (const_float_node);
19129 /* Now create vector types based on our NEON element types. */
19130 /* 64-bit vectors. */
19132 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19134 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19136 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19138 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19139 /* 128-bit vectors. */
19141 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19143 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19145 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19147 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19149 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19151 /* Unsigned integer types for various mode sizes. */
19152 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19153 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19154 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19155 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19157 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19158 "__builtin_neon_uqi");
19159 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19160 "__builtin_neon_uhi");
19161 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19162 "__builtin_neon_usi");
19163 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19164 "__builtin_neon_udi");
19166 /* Opaque integer types for structures of vectors. */
19167 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19168 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19169 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19170 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19172 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19173 "__builtin_neon_ti");
19174 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19175 "__builtin_neon_ei");
19176 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19177 "__builtin_neon_oi");
19178 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19179 "__builtin_neon_ci");
19180 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19181 "__builtin_neon_xi");
19183 /* Pointers to vector types. */
19184 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19185 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19186 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19187 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19188 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19189 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19190 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19191 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19192 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19194 /* Operations which return results as pairs. */
19195 void_ftype_pv8qi_v8qi_v8qi =
19196 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19197 V8QI_type_node, NULL);
19198 void_ftype_pv4hi_v4hi_v4hi =
19199 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19200 V4HI_type_node, NULL);
19201 void_ftype_pv2si_v2si_v2si =
19202 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19203 V2SI_type_node, NULL);
19204 void_ftype_pv2sf_v2sf_v2sf =
19205 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19206 V2SF_type_node, NULL);
19207 void_ftype_pdi_di_di =
19208 build_function_type_list (void_type_node, intDI_pointer_node,
19209 neon_intDI_type_node, neon_intDI_type_node, NULL);
19210 void_ftype_pv16qi_v16qi_v16qi =
19211 build_function_type_list (void_type_node, V16QI_pointer_node,
19212 V16QI_type_node, V16QI_type_node, NULL);
19213 void_ftype_pv8hi_v8hi_v8hi =
19214 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19215 V8HI_type_node, NULL);
19216 void_ftype_pv4si_v4si_v4si =
19217 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19218 V4SI_type_node, NULL);
19219 void_ftype_pv4sf_v4sf_v4sf =
19220 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19221 V4SF_type_node, NULL);
19222 void_ftype_pv2di_v2di_v2di =
19223 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19224 V2DI_type_node, NULL);
19226 dreg_types[0] = V8QI_type_node;
19227 dreg_types[1] = V4HI_type_node;
19228 dreg_types[2] = V2SI_type_node;
19229 dreg_types[3] = V2SF_type_node;
19230 dreg_types[4] = neon_intDI_type_node;
19232 qreg_types[0] = V16QI_type_node;
19233 qreg_types[1] = V8HI_type_node;
19234 qreg_types[2] = V4SI_type_node;
19235 qreg_types[3] = V4SF_type_node;
19236 qreg_types[4] = V2DI_type_node;
19238 for (i = 0; i < 5; i++)
19241 for (j = 0; j < 5; j++)
19243 reinterp_ftype_dreg[i][j]
19244 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19245 reinterp_ftype_qreg[i][j]
19246 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19250 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19251 i < ARRAY_SIZE (neon_builtin_data);
19254 neon_builtin_datum *d = &neon_builtin_data[i];
19256 const char* const modenames[] = {
19257 "v8qi", "v4hi", "v2si", "v2sf", "di",
19258 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19263 int is_load = 0, is_store = 0;
19265 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19272 case NEON_LOAD1LANE:
19273 case NEON_LOADSTRUCT:
19274 case NEON_LOADSTRUCTLANE:
19276 /* Fall through. */
19278 case NEON_STORE1LANE:
19279 case NEON_STORESTRUCT:
19280 case NEON_STORESTRUCTLANE:
19283 /* Fall through. */
19286 case NEON_LOGICBINOP:
19287 case NEON_SHIFTINSERT:
19294 case NEON_SHIFTIMM:
19295 case NEON_SHIFTACC:
19301 case NEON_LANEMULL:
19302 case NEON_LANEMULH:
19304 case NEON_SCALARMUL:
19305 case NEON_SCALARMULL:
19306 case NEON_SCALARMULH:
19307 case NEON_SCALARMAC:
19313 tree return_type = void_type_node, args = void_list_node;
19315 /* Build a function type directly from the insn_data for
19316 this builtin. The build_function_type() function takes
19317 care of removing duplicates for us. */
19318 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19322 if (is_load && k == 1)
19324 /* Neon load patterns always have the memory
19325 operand in the operand 1 position. */
19326 gcc_assert (insn_data[d->code].operand[k].predicate
19327 == neon_struct_operand);
19333 eltype = const_intQI_pointer_node;
19338 eltype = const_intHI_pointer_node;
19343 eltype = const_intSI_pointer_node;
19348 eltype = const_float_pointer_node;
19353 eltype = const_intDI_pointer_node;
19356 default: gcc_unreachable ();
19359 else if (is_store && k == 0)
19361 /* Similarly, Neon store patterns use operand 0 as
19362 the memory location to store to. */
19363 gcc_assert (insn_data[d->code].operand[k].predicate
19364 == neon_struct_operand);
19370 eltype = intQI_pointer_node;
19375 eltype = intHI_pointer_node;
19380 eltype = intSI_pointer_node;
19385 eltype = float_pointer_node;
19390 eltype = intDI_pointer_node;
19393 default: gcc_unreachable ();
19398 switch (insn_data[d->code].operand[k].mode)
19400 case VOIDmode: eltype = void_type_node; break;
19402 case QImode: eltype = neon_intQI_type_node; break;
19403 case HImode: eltype = neon_intHI_type_node; break;
19404 case SImode: eltype = neon_intSI_type_node; break;
19405 case SFmode: eltype = neon_float_type_node; break;
19406 case DImode: eltype = neon_intDI_type_node; break;
19407 case TImode: eltype = intTI_type_node; break;
19408 case EImode: eltype = intEI_type_node; break;
19409 case OImode: eltype = intOI_type_node; break;
19410 case CImode: eltype = intCI_type_node; break;
19411 case XImode: eltype = intXI_type_node; break;
19412 /* 64-bit vectors. */
19413 case V8QImode: eltype = V8QI_type_node; break;
19414 case V4HImode: eltype = V4HI_type_node; break;
19415 case V2SImode: eltype = V2SI_type_node; break;
19416 case V2SFmode: eltype = V2SF_type_node; break;
19417 /* 128-bit vectors. */
19418 case V16QImode: eltype = V16QI_type_node; break;
19419 case V8HImode: eltype = V8HI_type_node; break;
19420 case V4SImode: eltype = V4SI_type_node; break;
19421 case V4SFmode: eltype = V4SF_type_node; break;
19422 case V2DImode: eltype = V2DI_type_node; break;
19423 default: gcc_unreachable ();
19427 if (k == 0 && !is_store)
19428 return_type = eltype;
19430 args = tree_cons (NULL_TREE, eltype, args);
19433 ftype = build_function_type (return_type, args);
19437 case NEON_RESULTPAIR:
19439 switch (insn_data[d->code].operand[1].mode)
19441 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19442 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19443 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19444 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19445 case DImode: ftype = void_ftype_pdi_di_di; break;
19446 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19447 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19448 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19449 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19450 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19451 default: gcc_unreachable ();
19456 case NEON_REINTERP:
19458 /* We iterate over 5 doubleword types, then 5 quadword
19460 int rhs = d->mode % 5;
19461 switch (insn_data[d->code].operand[0].mode)
19463 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19464 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19465 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19466 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19467 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19468 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19469 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19470 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19471 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19472 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19473 default: gcc_unreachable ();
19479 gcc_unreachable ();
19482 gcc_assert (ftype != NULL);
19484 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19486 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19488 arm_builtin_decls[fcode] = decl;
19492 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19495 if ((MASK) & insn_flags) \
19498 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19499 BUILT_IN_MD, NULL, NULL_TREE); \
19500 arm_builtin_decls[CODE] = bdecl; \
19505 struct builtin_description
19507 const unsigned int mask;
19508 const enum insn_code icode;
19509 const char * const name;
19510 const enum arm_builtins code;
19511 const enum rtx_code comparison;
19512 const unsigned int flag;
19515 static const struct builtin_description bdesc_2arg[] =
19517 #define IWMMXT_BUILTIN(code, string, builtin) \
19518 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19519 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19521 #define IWMMXT2_BUILTIN(code, string, builtin) \
19522 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19523 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19525 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19526 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19527 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19528 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19529 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19530 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19531 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19532 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19533 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19534 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19535 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19536 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19537 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19538 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19539 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19540 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19541 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19542 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19543 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19544 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19545 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19546 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19547 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19548 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19549 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19550 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19551 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19552 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19553 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19554 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19555 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19556 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19557 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19558 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19559 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19560 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19561 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19562 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19563 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19564 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19565 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19566 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19567 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19568 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19569 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19570 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19571 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19572 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19573 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19574 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19575 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19576 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19577 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19578 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19579 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19580 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19581 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19582 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19583 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19584 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19585 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19586 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19587 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19588 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19589 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19590 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19591 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19592 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19593 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19594 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19595 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19596 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19597 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19598 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19599 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19600 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19601 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19602 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19604 #define IWMMXT_BUILTIN2(code, builtin) \
19605 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19607 #define IWMMXT2_BUILTIN2(code, builtin) \
19608 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19610 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19611 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19612 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19613 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19614 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19615 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19616 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19617 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19618 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19619 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19622 static const struct builtin_description bdesc_1arg[] =
19624 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19625 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19626 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19627 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19628 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19629 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19630 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19631 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19632 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19633 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19634 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19635 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19636 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19637 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19638 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19639 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19640 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19641 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19642 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19643 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19644 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19645 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19646 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19647 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19650 /* Set up all the iWMMXt builtins. This is not called if
19651 TARGET_IWMMXT is zero. */
19654 arm_init_iwmmxt_builtins (void)
19656 const struct builtin_description * d;
19659 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19660 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19661 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19663 tree v8qi_ftype_v8qi_v8qi_int
19664 = build_function_type_list (V8QI_type_node,
19665 V8QI_type_node, V8QI_type_node,
19666 integer_type_node, NULL_TREE);
19667 tree v4hi_ftype_v4hi_int
19668 = build_function_type_list (V4HI_type_node,
19669 V4HI_type_node, integer_type_node, NULL_TREE);
19670 tree v2si_ftype_v2si_int
19671 = build_function_type_list (V2SI_type_node,
19672 V2SI_type_node, integer_type_node, NULL_TREE);
19673 tree v2si_ftype_di_di
19674 = build_function_type_list (V2SI_type_node,
19675 long_long_integer_type_node,
19676 long_long_integer_type_node,
19678 tree di_ftype_di_int
19679 = build_function_type_list (long_long_integer_type_node,
19680 long_long_integer_type_node,
19681 integer_type_node, NULL_TREE);
19682 tree di_ftype_di_int_int
19683 = build_function_type_list (long_long_integer_type_node,
19684 long_long_integer_type_node,
19686 integer_type_node, NULL_TREE);
19687 tree int_ftype_v8qi
19688 = build_function_type_list (integer_type_node,
19689 V8QI_type_node, NULL_TREE);
19690 tree int_ftype_v4hi
19691 = build_function_type_list (integer_type_node,
19692 V4HI_type_node, NULL_TREE);
19693 tree int_ftype_v2si
19694 = build_function_type_list (integer_type_node,
19695 V2SI_type_node, NULL_TREE);
19696 tree int_ftype_v8qi_int
19697 = build_function_type_list (integer_type_node,
19698 V8QI_type_node, integer_type_node, NULL_TREE);
19699 tree int_ftype_v4hi_int
19700 = build_function_type_list (integer_type_node,
19701 V4HI_type_node, integer_type_node, NULL_TREE);
19702 tree int_ftype_v2si_int
19703 = build_function_type_list (integer_type_node,
19704 V2SI_type_node, integer_type_node, NULL_TREE);
19705 tree v8qi_ftype_v8qi_int_int
19706 = build_function_type_list (V8QI_type_node,
19707 V8QI_type_node, integer_type_node,
19708 integer_type_node, NULL_TREE);
19709 tree v4hi_ftype_v4hi_int_int
19710 = build_function_type_list (V4HI_type_node,
19711 V4HI_type_node, integer_type_node,
19712 integer_type_node, NULL_TREE);
19713 tree v2si_ftype_v2si_int_int
19714 = build_function_type_list (V2SI_type_node,
19715 V2SI_type_node, integer_type_node,
19716 integer_type_node, NULL_TREE);
19717 /* Miscellaneous. */
19718 tree v8qi_ftype_v4hi_v4hi
19719 = build_function_type_list (V8QI_type_node,
19720 V4HI_type_node, V4HI_type_node, NULL_TREE);
19721 tree v4hi_ftype_v2si_v2si
19722 = build_function_type_list (V4HI_type_node,
19723 V2SI_type_node, V2SI_type_node, NULL_TREE);
19724 tree v8qi_ftype_v4hi_v8qi
19725 = build_function_type_list (V8QI_type_node,
19726 V4HI_type_node, V8QI_type_node, NULL_TREE);
19727 tree v2si_ftype_v4hi_v4hi
19728 = build_function_type_list (V2SI_type_node,
19729 V4HI_type_node, V4HI_type_node, NULL_TREE);
19730 tree v2si_ftype_v8qi_v8qi
19731 = build_function_type_list (V2SI_type_node,
19732 V8QI_type_node, V8QI_type_node, NULL_TREE);
19733 tree v4hi_ftype_v4hi_di
19734 = build_function_type_list (V4HI_type_node,
19735 V4HI_type_node, long_long_integer_type_node,
19737 tree v2si_ftype_v2si_di
19738 = build_function_type_list (V2SI_type_node,
19739 V2SI_type_node, long_long_integer_type_node,
19742 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19743 tree int_ftype_void
19744 = build_function_type_list (integer_type_node, NULL_TREE);
19746 = build_function_type_list (long_long_integer_type_node,
19747 V8QI_type_node, NULL_TREE);
19749 = build_function_type_list (long_long_integer_type_node,
19750 V4HI_type_node, NULL_TREE);
19752 = build_function_type_list (long_long_integer_type_node,
19753 V2SI_type_node, NULL_TREE);
19754 tree v2si_ftype_v4hi
19755 = build_function_type_list (V2SI_type_node,
19756 V4HI_type_node, NULL_TREE);
19757 tree v4hi_ftype_v8qi
19758 = build_function_type_list (V4HI_type_node,
19759 V8QI_type_node, NULL_TREE);
19760 tree v8qi_ftype_v8qi
19761 = build_function_type_list (V8QI_type_node,
19762 V8QI_type_node, NULL_TREE);
19763 tree v4hi_ftype_v4hi
19764 = build_function_type_list (V4HI_type_node,
19765 V4HI_type_node, NULL_TREE);
19766 tree v2si_ftype_v2si
19767 = build_function_type_list (V2SI_type_node,
19768 V2SI_type_node, NULL_TREE);
19770 tree di_ftype_di_v4hi_v4hi
19771 = build_function_type_list (long_long_unsigned_type_node,
19772 long_long_unsigned_type_node,
19773 V4HI_type_node, V4HI_type_node,
19776 tree di_ftype_v4hi_v4hi
19777 = build_function_type_list (long_long_unsigned_type_node,
19778 V4HI_type_node,V4HI_type_node,
19781 tree v2si_ftype_v2si_v4hi_v4hi
19782 = build_function_type_list (V2SI_type_node,
19783 V2SI_type_node, V4HI_type_node,
19784 V4HI_type_node, NULL_TREE);
19786 tree v2si_ftype_v2si_v8qi_v8qi
19787 = build_function_type_list (V2SI_type_node,
19788 V2SI_type_node, V8QI_type_node,
19789 V8QI_type_node, NULL_TREE);
19791 tree di_ftype_di_v2si_v2si
19792 = build_function_type_list (long_long_unsigned_type_node,
19793 long_long_unsigned_type_node,
19794 V2SI_type_node, V2SI_type_node,
19797 tree di_ftype_di_di_int
19798 = build_function_type_list (long_long_unsigned_type_node,
19799 long_long_unsigned_type_node,
19800 long_long_unsigned_type_node,
19801 integer_type_node, NULL_TREE);
19803 tree void_ftype_int
19804 = build_function_type_list (void_type_node,
19805 integer_type_node, NULL_TREE);
19807 tree v8qi_ftype_char
19808 = build_function_type_list (V8QI_type_node,
19809 signed_char_type_node, NULL_TREE);
19811 tree v4hi_ftype_short
19812 = build_function_type_list (V4HI_type_node,
19813 short_integer_type_node, NULL_TREE);
19815 tree v2si_ftype_int
19816 = build_function_type_list (V2SI_type_node,
19817 integer_type_node, NULL_TREE);
19819 /* Normal vector binops. */
19820 tree v8qi_ftype_v8qi_v8qi
19821 = build_function_type_list (V8QI_type_node,
19822 V8QI_type_node, V8QI_type_node, NULL_TREE);
19823 tree v4hi_ftype_v4hi_v4hi
19824 = build_function_type_list (V4HI_type_node,
19825 V4HI_type_node,V4HI_type_node, NULL_TREE);
19826 tree v2si_ftype_v2si_v2si
19827 = build_function_type_list (V2SI_type_node,
19828 V2SI_type_node, V2SI_type_node, NULL_TREE);
19829 tree di_ftype_di_di
19830 = build_function_type_list (long_long_unsigned_type_node,
19831 long_long_unsigned_type_node,
19832 long_long_unsigned_type_node,
19835 /* Add all builtins that are more or less simple operations on two
19837 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19839 /* Use one of the operands; the target can have a different mode for
19840 mask-generating compares. */
19841 enum machine_mode mode;
19847 mode = insn_data[d->icode].operand[1].mode;
19852 type = v8qi_ftype_v8qi_v8qi;
19855 type = v4hi_ftype_v4hi_v4hi;
19858 type = v2si_ftype_v2si_v2si;
19861 type = di_ftype_di_di;
19865 gcc_unreachable ();
19868 def_mbuiltin (d->mask, d->name, type, d->code);
19871 /* Add the remaining MMX insns with somewhat more complicated types. */
19872 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19873 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19874 ARM_BUILTIN_ ## CODE)
19876 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
19877 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
19878 ARM_BUILTIN_ ## CODE)
19880 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19881 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
19882 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
19883 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
19884 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
19885 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
19886 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
19887 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
19888 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
19890 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19891 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19892 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19893 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19894 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19895 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19897 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19898 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19899 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19900 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19901 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19902 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19904 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19905 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19906 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19907 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19908 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19909 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19911 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19912 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19913 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19914 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19915 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19916 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19918 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19920 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
19921 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
19922 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
19923 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
19924 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
19925 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
19926 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
19927 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
19928 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19929 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19931 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19932 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19933 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19934 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19935 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19936 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19937 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19938 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19939 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19941 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19942 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19943 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19945 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19946 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19947 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19949 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
19950 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
19952 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19953 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19954 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19955 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19956 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19957 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19959 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19960 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19961 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19962 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19963 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19964 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19965 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19966 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19967 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19968 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19969 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19970 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19972 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19973 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19974 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19975 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19977 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
19978 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19979 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19980 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19981 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19982 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19983 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19985 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
19986 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
19987 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
19989 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
19990 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
19991 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
19992 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
19994 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
19995 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
19996 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
19997 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
19999 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20000 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20001 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20002 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20004 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20005 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20006 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20007 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20009 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20010 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20011 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20012 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20014 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20015 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20016 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20017 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20019 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20021 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20022 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20023 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20025 #undef iwmmx_mbuiltin
20026 #undef iwmmx2_mbuiltin
20030 arm_init_tls_builtins (void)
20034 ftype = build_function_type (ptr_type_node, void_list_node);
20035 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20036 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20038 TREE_NOTHROW (decl) = 1;
20039 TREE_READONLY (decl) = 1;
20040 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20044 arm_init_fp16_builtins (void)
20046 tree fp16_type = make_node (REAL_TYPE);
20047 TYPE_PRECISION (fp16_type) = 16;
20048 layout_type (fp16_type);
20049 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20053 arm_init_builtins (void)
20055 arm_init_tls_builtins ();
20057 if (TARGET_REALLY_IWMMXT)
20058 arm_init_iwmmxt_builtins ();
20061 arm_init_neon_builtins ();
20063 if (arm_fp16_format)
20064 arm_init_fp16_builtins ();
20067 /* Return the ARM builtin for CODE. */
20070 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20072 if (code >= ARM_BUILTIN_MAX)
20073 return error_mark_node;
20075 return arm_builtin_decls[code];
20078 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20080 static const char *
20081 arm_invalid_parameter_type (const_tree t)
20083 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20084 return N_("function parameters cannot have __fp16 type");
20088 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20090 static const char *
20091 arm_invalid_return_type (const_tree t)
20093 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20094 return N_("functions cannot return __fp16 type");
20098 /* Implement TARGET_PROMOTED_TYPE. */
20101 arm_promoted_type (const_tree t)
20103 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20104 return float_type_node;
20108 /* Implement TARGET_CONVERT_TO_TYPE.
20109 Specifically, this hook implements the peculiarity of the ARM
20110 half-precision floating-point C semantics that requires conversions between
20111 __fp16 to or from double to do an intermediate conversion to float. */
20114 arm_convert_to_type (tree type, tree expr)
20116 tree fromtype = TREE_TYPE (expr);
20117 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20119 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20120 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20121 return convert (type, convert (float_type_node, expr));
20125 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20126 This simply adds HFmode as a supported mode; even though we don't
20127 implement arithmetic on this type directly, it's supported by
20128 optabs conversions, much the way the double-word arithmetic is
20129 special-cased in the default hook. */
20132 arm_scalar_mode_supported_p (enum machine_mode mode)
20134 if (mode == HFmode)
20135 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20136 else if (ALL_FIXED_POINT_MODE_P (mode))
20139 return default_scalar_mode_supported_p (mode);
20142 /* Errors in the source file can cause expand_expr to return const0_rtx
20143 where we expect a vector. To avoid crashing, use one of the vector
20144 clear instructions. */
20147 safe_vector_operand (rtx x, enum machine_mode mode)
20149 if (x != const0_rtx)
20151 x = gen_reg_rtx (mode);
20153 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20154 : gen_rtx_SUBREG (DImode, x, 0)));
20158 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20161 arm_expand_binop_builtin (enum insn_code icode,
20162 tree exp, rtx target)
20165 tree arg0 = CALL_EXPR_ARG (exp, 0);
20166 tree arg1 = CALL_EXPR_ARG (exp, 1);
20167 rtx op0 = expand_normal (arg0);
20168 rtx op1 = expand_normal (arg1);
20169 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20170 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20171 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20173 if (VECTOR_MODE_P (mode0))
20174 op0 = safe_vector_operand (op0, mode0);
20175 if (VECTOR_MODE_P (mode1))
20176 op1 = safe_vector_operand (op1, mode1);
20179 || GET_MODE (target) != tmode
20180 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20181 target = gen_reg_rtx (tmode);
20183 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20184 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20186 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20187 op0 = copy_to_mode_reg (mode0, op0);
20188 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20189 op1 = copy_to_mode_reg (mode1, op1);
20191 pat = GEN_FCN (icode) (target, op0, op1);
20198 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20201 arm_expand_unop_builtin (enum insn_code icode,
20202 tree exp, rtx target, int do_load)
20205 tree arg0 = CALL_EXPR_ARG (exp, 0);
20206 rtx op0 = expand_normal (arg0);
20207 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20208 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20211 || GET_MODE (target) != tmode
20212 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20213 target = gen_reg_rtx (tmode);
20215 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20218 if (VECTOR_MODE_P (mode0))
20219 op0 = safe_vector_operand (op0, mode0);
20221 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20222 op0 = copy_to_mode_reg (mode0, op0);
20225 pat = GEN_FCN (icode) (target, op0);
20233 NEON_ARG_COPY_TO_REG,
20239 #define NEON_MAX_BUILTIN_ARGS 5
20241 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20242 and return an expression for the accessed memory.
20244 The intrinsic function operates on a block of registers that has
20245 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20246 The function references the memory at EXP in mode MEM_MODE;
20247 this mode may be BLKmode if no more suitable mode is available. */
20250 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20251 enum machine_mode reg_mode,
20252 neon_builtin_type_mode type_mode)
20254 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20255 tree elem_type, upper_bound, array_type;
20257 /* Work out the size of the register block in bytes. */
20258 reg_size = GET_MODE_SIZE (reg_mode);
20260 /* Work out the size of each vector in bytes. */
20261 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20262 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20264 /* Work out how many vectors there are. */
20265 gcc_assert (reg_size % vector_size == 0);
20266 nvectors = reg_size / vector_size;
20268 /* Work out how many elements are being loaded or stored.
20269 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20270 and memory elements; anything else implies a lane load or store. */
20271 if (mem_mode == reg_mode)
20272 nelems = vector_size * nvectors;
20276 /* Work out the type of each element. */
20277 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20278 elem_type = TREE_TYPE (TREE_TYPE (exp));
20280 /* Create a type that describes the full access. */
20281 upper_bound = build_int_cst (size_type_node, nelems - 1);
20282 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20284 /* Dereference EXP using that type. */
20285 return fold_build2 (MEM_REF, array_type, exp,
20286 build_int_cst (build_pointer_type (array_type), 0));
20289 /* Expand a Neon builtin. */
20291 arm_expand_neon_args (rtx target, int icode, int have_retval,
20292 neon_builtin_type_mode type_mode,
20297 tree arg[NEON_MAX_BUILTIN_ARGS];
20298 rtx op[NEON_MAX_BUILTIN_ARGS];
20299 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20300 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20301 enum machine_mode other_mode;
20307 || GET_MODE (target) != tmode
20308 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20309 target = gen_reg_rtx (tmode);
20311 va_start (ap, exp);
20315 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20317 if (thisarg == NEON_ARG_STOP)
20321 opno = argc + have_retval;
20322 mode[argc] = insn_data[icode].operand[opno].mode;
20323 arg[argc] = CALL_EXPR_ARG (exp, argc);
20324 if (thisarg == NEON_ARG_MEMORY)
20326 other_mode = insn_data[icode].operand[1 - opno].mode;
20327 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20328 other_mode, type_mode);
20330 op[argc] = expand_normal (arg[argc]);
20334 case NEON_ARG_COPY_TO_REG:
20335 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20336 if (!(*insn_data[icode].operand[opno].predicate)
20337 (op[argc], mode[argc]))
20338 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20341 case NEON_ARG_CONSTANT:
20342 /* FIXME: This error message is somewhat unhelpful. */
20343 if (!(*insn_data[icode].operand[opno].predicate)
20344 (op[argc], mode[argc]))
20345 error ("argument must be a constant");
20348 case NEON_ARG_MEMORY:
20349 gcc_assert (MEM_P (op[argc]));
20350 PUT_MODE (op[argc], mode[argc]);
20351 /* ??? arm_neon.h uses the same built-in functions for signed
20352 and unsigned accesses, casting where necessary. This isn't
20354 set_mem_alias_set (op[argc], 0);
20355 if (!(*insn_data[icode].operand[opno].predicate)
20356 (op[argc], mode[argc]))
20357 op[argc] = (replace_equiv_address
20358 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20361 case NEON_ARG_STOP:
20362 gcc_unreachable ();
20375 pat = GEN_FCN (icode) (target, op[0]);
20379 pat = GEN_FCN (icode) (target, op[0], op[1]);
20383 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20387 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20391 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20395 gcc_unreachable ();
20401 pat = GEN_FCN (icode) (op[0]);
20405 pat = GEN_FCN (icode) (op[0], op[1]);
20409 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20413 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20417 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20421 gcc_unreachable ();
20432 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20433 constants defined per-instruction or per instruction-variant. Instead, the
20434 required info is looked up in the table neon_builtin_data. */
20436 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20438 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20439 neon_itype itype = d->itype;
20440 enum insn_code icode = d->code;
20441 neon_builtin_type_mode type_mode = d->mode;
20448 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20449 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20453 case NEON_SCALARMUL:
20454 case NEON_SCALARMULL:
20455 case NEON_SCALARMULH:
20456 case NEON_SHIFTINSERT:
20457 case NEON_LOGICBINOP:
20458 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20459 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20463 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20464 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20465 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20469 case NEON_SHIFTIMM:
20470 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20471 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20475 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20476 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20480 case NEON_REINTERP:
20481 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20482 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20486 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20487 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20489 case NEON_RESULTPAIR:
20490 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20491 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20495 case NEON_LANEMULL:
20496 case NEON_LANEMULH:
20497 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20498 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20499 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20502 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20503 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20504 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20506 case NEON_SHIFTACC:
20507 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20508 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20509 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20511 case NEON_SCALARMAC:
20512 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20513 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20514 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20518 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20519 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20523 case NEON_LOADSTRUCT:
20524 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20525 NEON_ARG_MEMORY, NEON_ARG_STOP);
20527 case NEON_LOAD1LANE:
20528 case NEON_LOADSTRUCTLANE:
20529 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20530 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20534 case NEON_STORESTRUCT:
20535 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20536 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20538 case NEON_STORE1LANE:
20539 case NEON_STORESTRUCTLANE:
20540 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20541 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20545 gcc_unreachable ();
20548 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20550 neon_reinterpret (rtx dest, rtx src)
20552 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20555 /* Emit code to place a Neon pair result in memory locations (with equal
20558 neon_emit_pair_result_insn (enum machine_mode mode,
20559 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20562 rtx mem = gen_rtx_MEM (mode, destaddr);
20563 rtx tmp1 = gen_reg_rtx (mode);
20564 rtx tmp2 = gen_reg_rtx (mode);
20566 emit_insn (intfn (tmp1, op1, op2, tmp2));
20568 emit_move_insn (mem, tmp1);
20569 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20570 emit_move_insn (mem, tmp2);
20573 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20574 not to early-clobber SRC registers in the process.
20576 We assume that the operands described by SRC and DEST represent a
20577 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20578 number of components into which the copy has been decomposed. */
20580 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20584 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20585 || REGNO (operands[0]) < REGNO (operands[1]))
20587 for (i = 0; i < count; i++)
20589 operands[2 * i] = dest[i];
20590 operands[2 * i + 1] = src[i];
20595 for (i = 0; i < count; i++)
20597 operands[2 * i] = dest[count - i - 1];
20598 operands[2 * i + 1] = src[count - i - 1];
20603 /* Split operands into moves from op[1] + op[2] into op[0]. */
20606 neon_split_vcombine (rtx operands[3])
20608 unsigned int dest = REGNO (operands[0]);
20609 unsigned int src1 = REGNO (operands[1]);
20610 unsigned int src2 = REGNO (operands[2]);
20611 enum machine_mode halfmode = GET_MODE (operands[1]);
20612 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20613 rtx destlo, desthi;
20615 if (src1 == dest && src2 == dest + halfregs)
20617 /* No-op move. Can't split to nothing; emit something. */
20618 emit_note (NOTE_INSN_DELETED);
20622 /* Preserve register attributes for variable tracking. */
20623 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20624 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20625 GET_MODE_SIZE (halfmode));
20627 /* Special case of reversed high/low parts. Use VSWP. */
20628 if (src2 == dest && src1 == dest + halfregs)
20630 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20631 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20636 if (!reg_overlap_mentioned_p (operands[2], destlo))
20638 /* Try to avoid unnecessary moves if part of the result
20639 is in the right place already. */
20641 emit_move_insn (destlo, operands[1]);
20642 if (src2 != dest + halfregs)
20643 emit_move_insn (desthi, operands[2]);
20647 if (src2 != dest + halfregs)
20648 emit_move_insn (desthi, operands[2]);
20650 emit_move_insn (destlo, operands[1]);
20654 /* Expand an expression EXP that calls a built-in function,
20655 with result going to TARGET if that's convenient
20656 (and in mode MODE if that's convenient).
20657 SUBTARGET may be used as the target for computing one of EXP's operands.
20658 IGNORE is nonzero if the value is to be ignored. */
20661 arm_expand_builtin (tree exp,
20663 rtx subtarget ATTRIBUTE_UNUSED,
20664 enum machine_mode mode ATTRIBUTE_UNUSED,
20665 int ignore ATTRIBUTE_UNUSED)
20667 const struct builtin_description * d;
20668 enum insn_code icode;
20669 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20677 int fcode = DECL_FUNCTION_CODE (fndecl);
20679 enum machine_mode tmode;
20680 enum machine_mode mode0;
20681 enum machine_mode mode1;
20682 enum machine_mode mode2;
20688 if (fcode >= ARM_BUILTIN_NEON_BASE)
20689 return arm_expand_neon_builtin (fcode, exp, target);
20693 case ARM_BUILTIN_TEXTRMSB:
20694 case ARM_BUILTIN_TEXTRMUB:
20695 case ARM_BUILTIN_TEXTRMSH:
20696 case ARM_BUILTIN_TEXTRMUH:
20697 case ARM_BUILTIN_TEXTRMSW:
20698 case ARM_BUILTIN_TEXTRMUW:
20699 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20700 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20701 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20702 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20703 : CODE_FOR_iwmmxt_textrmw);
20705 arg0 = CALL_EXPR_ARG (exp, 0);
20706 arg1 = CALL_EXPR_ARG (exp, 1);
20707 op0 = expand_normal (arg0);
20708 op1 = expand_normal (arg1);
20709 tmode = insn_data[icode].operand[0].mode;
20710 mode0 = insn_data[icode].operand[1].mode;
20711 mode1 = insn_data[icode].operand[2].mode;
20713 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20714 op0 = copy_to_mode_reg (mode0, op0);
20715 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20717 /* @@@ better error message */
20718 error ("selector must be an immediate");
20719 return gen_reg_rtx (tmode);
20722 opint = INTVAL (op1);
20723 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20725 if (opint > 7 || opint < 0)
20726 error ("the range of selector should be in 0 to 7");
20728 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20730 if (opint > 3 || opint < 0)
20731 error ("the range of selector should be in 0 to 3");
20733 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20735 if (opint > 1 || opint < 0)
20736 error ("the range of selector should be in 0 to 1");
20740 || GET_MODE (target) != tmode
20741 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20742 target = gen_reg_rtx (tmode);
20743 pat = GEN_FCN (icode) (target, op0, op1);
20749 case ARM_BUILTIN_WALIGNI:
20750 /* If op2 is immediate, call walighi, else call walighr. */
20751 arg0 = CALL_EXPR_ARG (exp, 0);
20752 arg1 = CALL_EXPR_ARG (exp, 1);
20753 arg2 = CALL_EXPR_ARG (exp, 2);
20754 op0 = expand_normal (arg0);
20755 op1 = expand_normal (arg1);
20756 op2 = expand_normal (arg2);
20757 if (GET_CODE (op2) == CONST_INT)
20759 icode = CODE_FOR_iwmmxt_waligni;
20760 tmode = insn_data[icode].operand[0].mode;
20761 mode0 = insn_data[icode].operand[1].mode;
20762 mode1 = insn_data[icode].operand[2].mode;
20763 mode2 = insn_data[icode].operand[3].mode;
20764 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20765 op0 = copy_to_mode_reg (mode0, op0);
20766 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20767 op1 = copy_to_mode_reg (mode1, op1);
20768 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
20769 selector = INTVAL (op2);
20770 if (selector > 7 || selector < 0)
20771 error ("the range of selector should be in 0 to 7");
20775 icode = CODE_FOR_iwmmxt_walignr;
20776 tmode = insn_data[icode].operand[0].mode;
20777 mode0 = insn_data[icode].operand[1].mode;
20778 mode1 = insn_data[icode].operand[2].mode;
20779 mode2 = insn_data[icode].operand[3].mode;
20780 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20781 op0 = copy_to_mode_reg (mode0, op0);
20782 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20783 op1 = copy_to_mode_reg (mode1, op1);
20784 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
20785 op2 = copy_to_mode_reg (mode2, op2);
20788 || GET_MODE (target) != tmode
20789 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20790 target = gen_reg_rtx (tmode);
20791 pat = GEN_FCN (icode) (target, op0, op1, op2);
20797 case ARM_BUILTIN_TINSRB:
20798 case ARM_BUILTIN_TINSRH:
20799 case ARM_BUILTIN_TINSRW:
20800 case ARM_BUILTIN_WMERGE:
20801 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20802 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20803 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
20804 : CODE_FOR_iwmmxt_tinsrw);
20805 arg0 = CALL_EXPR_ARG (exp, 0);
20806 arg1 = CALL_EXPR_ARG (exp, 1);
20807 arg2 = CALL_EXPR_ARG (exp, 2);
20808 op0 = expand_normal (arg0);
20809 op1 = expand_normal (arg1);
20810 op2 = expand_normal (arg2);
20811 tmode = insn_data[icode].operand[0].mode;
20812 mode0 = insn_data[icode].operand[1].mode;
20813 mode1 = insn_data[icode].operand[2].mode;
20814 mode2 = insn_data[icode].operand[3].mode;
20816 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20817 op0 = copy_to_mode_reg (mode0, op0);
20818 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20819 op1 = copy_to_mode_reg (mode1, op1);
20820 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20822 error ("selector must be an immediate");
20825 if (icode == CODE_FOR_iwmmxt_wmerge)
20827 selector = INTVAL (op2);
20828 if (selector > 7 || selector < 0)
20829 error ("the range of selector should be in 0 to 7");
20831 if ((icode == CODE_FOR_iwmmxt_tinsrb)
20832 || (icode == CODE_FOR_iwmmxt_tinsrh)
20833 || (icode == CODE_FOR_iwmmxt_tinsrw))
20836 selector= INTVAL (op2);
20837 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
20838 error ("the range of selector should be in 0 to 7");
20839 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
20840 error ("the range of selector should be in 0 to 3");
20841 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
20842 error ("the range of selector should be in 0 to 1");
20844 op2 = gen_rtx_CONST_INT (SImode, mask);
20847 || GET_MODE (target) != tmode
20848 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20849 target = gen_reg_rtx (tmode);
20850 pat = GEN_FCN (icode) (target, op0, op1, op2);
20856 case ARM_BUILTIN_SETWCGR0:
20857 case ARM_BUILTIN_SETWCGR1:
20858 case ARM_BUILTIN_SETWCGR2:
20859 case ARM_BUILTIN_SETWCGR3:
20860 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
20861 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
20862 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
20863 : CODE_FOR_iwmmxt_setwcgr3);
20864 arg0 = CALL_EXPR_ARG (exp, 0);
20865 op0 = expand_normal (arg0);
20866 mode0 = insn_data[icode].operand[0].mode;
20867 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
20868 op0 = copy_to_mode_reg (mode0, op0);
20869 pat = GEN_FCN (icode) (op0);
20875 case ARM_BUILTIN_GETWCGR0:
20876 case ARM_BUILTIN_GETWCGR1:
20877 case ARM_BUILTIN_GETWCGR2:
20878 case ARM_BUILTIN_GETWCGR3:
20879 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
20880 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
20881 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
20882 : CODE_FOR_iwmmxt_getwcgr3);
20883 tmode = insn_data[icode].operand[0].mode;
20885 || GET_MODE (target) != tmode
20886 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20887 target = gen_reg_rtx (tmode);
20888 pat = GEN_FCN (icode) (target);
20894 case ARM_BUILTIN_WSHUFH:
20895 icode = CODE_FOR_iwmmxt_wshufh;
20896 arg0 = CALL_EXPR_ARG (exp, 0);
20897 arg1 = CALL_EXPR_ARG (exp, 1);
20898 op0 = expand_normal (arg0);
20899 op1 = expand_normal (arg1);
20900 tmode = insn_data[icode].operand[0].mode;
20901 mode1 = insn_data[icode].operand[1].mode;
20902 mode2 = insn_data[icode].operand[2].mode;
20904 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20905 op0 = copy_to_mode_reg (mode1, op0);
20906 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20908 error ("mask must be an immediate");
20911 selector = INTVAL (op1);
20912 if (selector < 0 || selector > 255)
20913 error ("the range of mask should be in 0 to 255");
20915 || GET_MODE (target) != tmode
20916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20917 target = gen_reg_rtx (tmode);
20918 pat = GEN_FCN (icode) (target, op0, op1);
20924 case ARM_BUILTIN_WMADDS:
20925 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
20926 case ARM_BUILTIN_WMADDSX:
20927 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
20928 case ARM_BUILTIN_WMADDSN:
20929 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
20930 case ARM_BUILTIN_WMADDU:
20931 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
20932 case ARM_BUILTIN_WMADDUX:
20933 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
20934 case ARM_BUILTIN_WMADDUN:
20935 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
20936 case ARM_BUILTIN_WSADBZ:
20937 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20938 case ARM_BUILTIN_WSADHZ:
20939 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20941 /* Several three-argument builtins. */
20942 case ARM_BUILTIN_WMACS:
20943 case ARM_BUILTIN_WMACU:
20944 case ARM_BUILTIN_TMIA:
20945 case ARM_BUILTIN_TMIAPH:
20946 case ARM_BUILTIN_TMIATT:
20947 case ARM_BUILTIN_TMIATB:
20948 case ARM_BUILTIN_TMIABT:
20949 case ARM_BUILTIN_TMIABB:
20950 case ARM_BUILTIN_WQMIABB:
20951 case ARM_BUILTIN_WQMIABT:
20952 case ARM_BUILTIN_WQMIATB:
20953 case ARM_BUILTIN_WQMIATT:
20954 case ARM_BUILTIN_WQMIABBN:
20955 case ARM_BUILTIN_WQMIABTN:
20956 case ARM_BUILTIN_WQMIATBN:
20957 case ARM_BUILTIN_WQMIATTN:
20958 case ARM_BUILTIN_WMIABB:
20959 case ARM_BUILTIN_WMIABT:
20960 case ARM_BUILTIN_WMIATB:
20961 case ARM_BUILTIN_WMIATT:
20962 case ARM_BUILTIN_WMIABBN:
20963 case ARM_BUILTIN_WMIABTN:
20964 case ARM_BUILTIN_WMIATBN:
20965 case ARM_BUILTIN_WMIATTN:
20966 case ARM_BUILTIN_WMIAWBB:
20967 case ARM_BUILTIN_WMIAWBT:
20968 case ARM_BUILTIN_WMIAWTB:
20969 case ARM_BUILTIN_WMIAWTT:
20970 case ARM_BUILTIN_WMIAWBBN:
20971 case ARM_BUILTIN_WMIAWBTN:
20972 case ARM_BUILTIN_WMIAWTBN:
20973 case ARM_BUILTIN_WMIAWTTN:
20974 case ARM_BUILTIN_WSADB:
20975 case ARM_BUILTIN_WSADH:
20976 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20977 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20978 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20979 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20980 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20981 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20982 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20983 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20984 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
20985 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
20986 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
20987 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
20988 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
20989 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
20990 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
20991 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
20992 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
20993 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
20994 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
20995 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
20996 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
20997 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
20998 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
20999 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21000 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21001 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21002 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21003 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21004 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21005 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21006 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21007 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21008 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21009 : CODE_FOR_iwmmxt_wsadh);
21010 arg0 = CALL_EXPR_ARG (exp, 0);
21011 arg1 = CALL_EXPR_ARG (exp, 1);
21012 arg2 = CALL_EXPR_ARG (exp, 2);
21013 op0 = expand_normal (arg0);
21014 op1 = expand_normal (arg1);
21015 op2 = expand_normal (arg2);
21016 tmode = insn_data[icode].operand[0].mode;
21017 mode0 = insn_data[icode].operand[1].mode;
21018 mode1 = insn_data[icode].operand[2].mode;
21019 mode2 = insn_data[icode].operand[3].mode;
21021 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21022 op0 = copy_to_mode_reg (mode0, op0);
21023 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21024 op1 = copy_to_mode_reg (mode1, op1);
21025 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21026 op2 = copy_to_mode_reg (mode2, op2);
21028 || GET_MODE (target) != tmode
21029 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21030 target = gen_reg_rtx (tmode);
21031 pat = GEN_FCN (icode) (target, op0, op1, op2);
21037 case ARM_BUILTIN_WZERO:
21038 target = gen_reg_rtx (DImode);
21039 emit_insn (gen_iwmmxt_clrdi (target));
21042 case ARM_BUILTIN_WSRLHI:
21043 case ARM_BUILTIN_WSRLWI:
21044 case ARM_BUILTIN_WSRLDI:
21045 case ARM_BUILTIN_WSLLHI:
21046 case ARM_BUILTIN_WSLLWI:
21047 case ARM_BUILTIN_WSLLDI:
21048 case ARM_BUILTIN_WSRAHI:
21049 case ARM_BUILTIN_WSRAWI:
21050 case ARM_BUILTIN_WSRADI:
21051 case ARM_BUILTIN_WRORHI:
21052 case ARM_BUILTIN_WRORWI:
21053 case ARM_BUILTIN_WRORDI:
21054 case ARM_BUILTIN_WSRLH:
21055 case ARM_BUILTIN_WSRLW:
21056 case ARM_BUILTIN_WSRLD:
21057 case ARM_BUILTIN_WSLLH:
21058 case ARM_BUILTIN_WSLLW:
21059 case ARM_BUILTIN_WSLLD:
21060 case ARM_BUILTIN_WSRAH:
21061 case ARM_BUILTIN_WSRAW:
21062 case ARM_BUILTIN_WSRAD:
21063 case ARM_BUILTIN_WRORH:
21064 case ARM_BUILTIN_WRORW:
21065 case ARM_BUILTIN_WRORD:
21066 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21067 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21068 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21069 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21070 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21071 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21072 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21073 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21074 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21075 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21076 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21077 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21078 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21079 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21080 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21081 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21082 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21083 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21084 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21085 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21086 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21087 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21088 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21089 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21090 : CODE_FOR_nothing);
21091 arg1 = CALL_EXPR_ARG (exp, 1);
21092 op1 = expand_normal (arg1);
21093 if (GET_MODE (op1) == VOIDmode)
21095 imm = INTVAL (op1);
21096 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21097 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21098 && (imm < 0 || imm > 32))
21100 if (fcode == ARM_BUILTIN_WRORHI)
21101 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21102 else if (fcode == ARM_BUILTIN_WRORWI)
21103 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21104 else if (fcode == ARM_BUILTIN_WRORH)
21105 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21107 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21109 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21110 && (imm < 0 || imm > 64))
21112 if (fcode == ARM_BUILTIN_WRORDI)
21113 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21115 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21119 if (fcode == ARM_BUILTIN_WSRLHI)
21120 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21121 else if (fcode == ARM_BUILTIN_WSRLWI)
21122 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21123 else if (fcode == ARM_BUILTIN_WSRLDI)
21124 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21125 else if (fcode == ARM_BUILTIN_WSLLHI)
21126 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21127 else if (fcode == ARM_BUILTIN_WSLLWI)
21128 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21129 else if (fcode == ARM_BUILTIN_WSLLDI)
21130 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21131 else if (fcode == ARM_BUILTIN_WSRAHI)
21132 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21133 else if (fcode == ARM_BUILTIN_WSRAWI)
21134 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21135 else if (fcode == ARM_BUILTIN_WSRADI)
21136 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21137 else if (fcode == ARM_BUILTIN_WSRLH)
21138 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21139 else if (fcode == ARM_BUILTIN_WSRLW)
21140 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21141 else if (fcode == ARM_BUILTIN_WSRLD)
21142 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21143 else if (fcode == ARM_BUILTIN_WSLLH)
21144 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21145 else if (fcode == ARM_BUILTIN_WSLLW)
21146 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21147 else if (fcode == ARM_BUILTIN_WSLLD)
21148 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21149 else if (fcode == ARM_BUILTIN_WSRAH)
21150 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21151 else if (fcode == ARM_BUILTIN_WSRAW)
21152 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21154 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21157 return arm_expand_binop_builtin (icode, exp, target);
21159 case ARM_BUILTIN_THREAD_POINTER:
21160 return arm_load_tp (target);
21166 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21167 if (d->code == (const enum arm_builtins) fcode)
21168 return arm_expand_binop_builtin (d->icode, exp, target);
21170 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21171 if (d->code == (const enum arm_builtins) fcode)
21172 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21174 /* @@@ Should really do something sensible here. */
21178 /* Return the number (counting from 0) of
21179 the least significant set bit in MASK. */
21182 number_of_first_bit_set (unsigned mask)
21184 return ctz_hwi (mask);
21187 /* Like emit_multi_reg_push, but allowing for a different set of
21188 registers to be described as saved. MASK is the set of registers
21189 to be saved; REAL_REGS is the set of registers to be described as
21190 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21193 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21195 unsigned long regno;
21196 rtx par[10], tmp, reg, insn;
21199 /* Build the parallel of the registers actually being stored. */
21200 for (i = 0; mask; ++i, mask &= mask - 1)
21202 regno = ctz_hwi (mask);
21203 reg = gen_rtx_REG (SImode, regno);
21206 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21208 tmp = gen_rtx_USE (VOIDmode, reg);
21213 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21214 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21215 tmp = gen_frame_mem (BLKmode, tmp);
21216 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21219 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21220 insn = emit_insn (tmp);
21222 /* Always build the stack adjustment note for unwind info. */
21223 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21224 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21227 /* Build the parallel of the registers recorded as saved for unwind. */
21228 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21230 regno = ctz_hwi (real_regs);
21231 reg = gen_rtx_REG (SImode, regno);
21233 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21234 tmp = gen_frame_mem (SImode, tmp);
21235 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21236 RTX_FRAME_RELATED_P (tmp) = 1;
21244 RTX_FRAME_RELATED_P (par[0]) = 1;
21245 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21248 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21253 /* Emit code to push or pop registers to or from the stack. F is the
21254 assembly file. MASK is the registers to pop. */
21256 thumb_pop (FILE *f, unsigned long mask)
21259 int lo_mask = mask & 0xFF;
21260 int pushed_words = 0;
21264 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21266 /* Special case. Do not generate a POP PC statement here, do it in
21268 thumb_exit (f, -1);
21272 fprintf (f, "\tpop\t{");
21274 /* Look at the low registers first. */
21275 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21279 asm_fprintf (f, "%r", regno);
21281 if ((lo_mask & ~1) != 0)
21288 if (mask & (1 << PC_REGNUM))
21290 /* Catch popping the PC. */
21291 if (TARGET_INTERWORK || TARGET_BACKTRACE
21292 || crtl->calls_eh_return)
21294 /* The PC is never poped directly, instead
21295 it is popped into r3 and then BX is used. */
21296 fprintf (f, "}\n");
21298 thumb_exit (f, -1);
21307 asm_fprintf (f, "%r", PC_REGNUM);
21311 fprintf (f, "}\n");
21314 /* Generate code to return from a thumb function.
21315 If 'reg_containing_return_addr' is -1, then the return address is
21316 actually on the stack, at the stack pointer. */
21318 thumb_exit (FILE *f, int reg_containing_return_addr)
21320 unsigned regs_available_for_popping;
21321 unsigned regs_to_pop;
21323 unsigned available;
21327 int restore_a4 = FALSE;
21329 /* Compute the registers we need to pop. */
21333 if (reg_containing_return_addr == -1)
21335 regs_to_pop |= 1 << LR_REGNUM;
21339 if (TARGET_BACKTRACE)
21341 /* Restore the (ARM) frame pointer and stack pointer. */
21342 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21346 /* If there is nothing to pop then just emit the BX instruction and
21348 if (pops_needed == 0)
21350 if (crtl->calls_eh_return)
21351 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21353 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21356 /* Otherwise if we are not supporting interworking and we have not created
21357 a backtrace structure and the function was not entered in ARM mode then
21358 just pop the return address straight into the PC. */
21359 else if (!TARGET_INTERWORK
21360 && !TARGET_BACKTRACE
21361 && !is_called_in_ARM_mode (current_function_decl)
21362 && !crtl->calls_eh_return)
21364 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21368 /* Find out how many of the (return) argument registers we can corrupt. */
21369 regs_available_for_popping = 0;
21371 /* If returning via __builtin_eh_return, the bottom three registers
21372 all contain information needed for the return. */
21373 if (crtl->calls_eh_return)
21377 /* If we can deduce the registers used from the function's
21378 return value. This is more reliable that examining
21379 df_regs_ever_live_p () because that will be set if the register is
21380 ever used in the function, not just if the register is used
21381 to hold a return value. */
21383 if (crtl->return_rtx != 0)
21384 mode = GET_MODE (crtl->return_rtx);
21386 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21388 size = GET_MODE_SIZE (mode);
21392 /* In a void function we can use any argument register.
21393 In a function that returns a structure on the stack
21394 we can use the second and third argument registers. */
21395 if (mode == VOIDmode)
21396 regs_available_for_popping =
21397 (1 << ARG_REGISTER (1))
21398 | (1 << ARG_REGISTER (2))
21399 | (1 << ARG_REGISTER (3));
21401 regs_available_for_popping =
21402 (1 << ARG_REGISTER (2))
21403 | (1 << ARG_REGISTER (3));
21405 else if (size <= 4)
21406 regs_available_for_popping =
21407 (1 << ARG_REGISTER (2))
21408 | (1 << ARG_REGISTER (3));
21409 else if (size <= 8)
21410 regs_available_for_popping =
21411 (1 << ARG_REGISTER (3));
21414 /* Match registers to be popped with registers into which we pop them. */
21415 for (available = regs_available_for_popping,
21416 required = regs_to_pop;
21417 required != 0 && available != 0;
21418 available &= ~(available & - available),
21419 required &= ~(required & - required))
21422 /* If we have any popping registers left over, remove them. */
21424 regs_available_for_popping &= ~available;
21426 /* Otherwise if we need another popping register we can use
21427 the fourth argument register. */
21428 else if (pops_needed)
21430 /* If we have not found any free argument registers and
21431 reg a4 contains the return address, we must move it. */
21432 if (regs_available_for_popping == 0
21433 && reg_containing_return_addr == LAST_ARG_REGNUM)
21435 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21436 reg_containing_return_addr = LR_REGNUM;
21438 else if (size > 12)
21440 /* Register a4 is being used to hold part of the return value,
21441 but we have dire need of a free, low register. */
21444 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21447 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21449 /* The fourth argument register is available. */
21450 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21456 /* Pop as many registers as we can. */
21457 thumb_pop (f, regs_available_for_popping);
21459 /* Process the registers we popped. */
21460 if (reg_containing_return_addr == -1)
21462 /* The return address was popped into the lowest numbered register. */
21463 regs_to_pop &= ~(1 << LR_REGNUM);
21465 reg_containing_return_addr =
21466 number_of_first_bit_set (regs_available_for_popping);
21468 /* Remove this register for the mask of available registers, so that
21469 the return address will not be corrupted by further pops. */
21470 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21473 /* If we popped other registers then handle them here. */
21474 if (regs_available_for_popping)
21478 /* Work out which register currently contains the frame pointer. */
21479 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21481 /* Move it into the correct place. */
21482 asm_fprintf (f, "\tmov\t%r, %r\n",
21483 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21485 /* (Temporarily) remove it from the mask of popped registers. */
21486 regs_available_for_popping &= ~(1 << frame_pointer);
21487 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21489 if (regs_available_for_popping)
21493 /* We popped the stack pointer as well,
21494 find the register that contains it. */
21495 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21497 /* Move it into the stack register. */
21498 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21500 /* At this point we have popped all necessary registers, so
21501 do not worry about restoring regs_available_for_popping
21502 to its correct value:
21504 assert (pops_needed == 0)
21505 assert (regs_available_for_popping == (1 << frame_pointer))
21506 assert (regs_to_pop == (1 << STACK_POINTER)) */
21510 /* Since we have just move the popped value into the frame
21511 pointer, the popping register is available for reuse, and
21512 we know that we still have the stack pointer left to pop. */
21513 regs_available_for_popping |= (1 << frame_pointer);
21517 /* If we still have registers left on the stack, but we no longer have
21518 any registers into which we can pop them, then we must move the return
21519 address into the link register and make available the register that
21521 if (regs_available_for_popping == 0 && pops_needed > 0)
21523 regs_available_for_popping |= 1 << reg_containing_return_addr;
21525 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21526 reg_containing_return_addr);
21528 reg_containing_return_addr = LR_REGNUM;
21531 /* If we have registers left on the stack then pop some more.
21532 We know that at most we will want to pop FP and SP. */
21533 if (pops_needed > 0)
21538 thumb_pop (f, regs_available_for_popping);
21540 /* We have popped either FP or SP.
21541 Move whichever one it is into the correct register. */
21542 popped_into = number_of_first_bit_set (regs_available_for_popping);
21543 move_to = number_of_first_bit_set (regs_to_pop);
21545 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21547 regs_to_pop &= ~(1 << move_to);
21552 /* If we still have not popped everything then we must have only
21553 had one register available to us and we are now popping the SP. */
21554 if (pops_needed > 0)
21558 thumb_pop (f, regs_available_for_popping);
21560 popped_into = number_of_first_bit_set (regs_available_for_popping);
21562 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21564 assert (regs_to_pop == (1 << STACK_POINTER))
21565 assert (pops_needed == 1)
21569 /* If necessary restore the a4 register. */
21572 if (reg_containing_return_addr != LR_REGNUM)
21574 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21575 reg_containing_return_addr = LR_REGNUM;
21578 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21581 if (crtl->calls_eh_return)
21582 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21584 /* Return to caller. */
21585 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21588 /* Scan INSN just before assembler is output for it.
21589 For Thumb-1, we track the status of the condition codes; this
21590 information is used in the cbranchsi4_insn pattern. */
21592 thumb1_final_prescan_insn (rtx insn)
21594 if (flag_print_asm_name)
21595 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21596 INSN_ADDRESSES (INSN_UID (insn)));
21597 /* Don't overwrite the previous setter when we get to a cbranch. */
21598 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21600 enum attr_conds conds;
21602 if (cfun->machine->thumb1_cc_insn)
21604 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21605 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21608 conds = get_attr_conds (insn);
21609 if (conds == CONDS_SET)
21611 rtx set = single_set (insn);
21612 cfun->machine->thumb1_cc_insn = insn;
21613 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21614 cfun->machine->thumb1_cc_op1 = const0_rtx;
21615 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21616 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21618 rtx src1 = XEXP (SET_SRC (set), 1);
21619 if (src1 == const0_rtx)
21620 cfun->machine->thumb1_cc_mode = CCmode;
21623 else if (conds != CONDS_NOCOND)
21624 cfun->machine->thumb1_cc_insn = NULL_RTX;
21629 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21631 unsigned HOST_WIDE_INT mask = 0xff;
21634 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21635 if (val == 0) /* XXX */
21638 for (i = 0; i < 25; i++)
21639 if ((val & (mask << i)) == val)
21645 /* Returns nonzero if the current function contains,
21646 or might contain a far jump. */
21648 thumb_far_jump_used_p (void)
21652 /* This test is only important for leaf functions. */
21653 /* assert (!leaf_function_p ()); */
21655 /* If we have already decided that far jumps may be used,
21656 do not bother checking again, and always return true even if
21657 it turns out that they are not being used. Once we have made
21658 the decision that far jumps are present (and that hence the link
21659 register will be pushed onto the stack) we cannot go back on it. */
21660 if (cfun->machine->far_jump_used)
21663 /* If this function is not being called from the prologue/epilogue
21664 generation code then it must be being called from the
21665 INITIAL_ELIMINATION_OFFSET macro. */
21666 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21668 /* In this case we know that we are being asked about the elimination
21669 of the arg pointer register. If that register is not being used,
21670 then there are no arguments on the stack, and we do not have to
21671 worry that a far jump might force the prologue to push the link
21672 register, changing the stack offsets. In this case we can just
21673 return false, since the presence of far jumps in the function will
21674 not affect stack offsets.
21676 If the arg pointer is live (or if it was live, but has now been
21677 eliminated and so set to dead) then we do have to test to see if
21678 the function might contain a far jump. This test can lead to some
21679 false negatives, since before reload is completed, then length of
21680 branch instructions is not known, so gcc defaults to returning their
21681 longest length, which in turn sets the far jump attribute to true.
21683 A false negative will not result in bad code being generated, but it
21684 will result in a needless push and pop of the link register. We
21685 hope that this does not occur too often.
21687 If we need doubleword stack alignment this could affect the other
21688 elimination offsets so we can't risk getting it wrong. */
21689 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21690 cfun->machine->arg_pointer_live = 1;
21691 else if (!cfun->machine->arg_pointer_live)
21695 /* Check to see if the function contains a branch
21696 insn with the far jump attribute set. */
21697 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21699 if (GET_CODE (insn) == JUMP_INSN
21700 /* Ignore tablejump patterns. */
21701 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21702 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21703 && get_attr_far_jump (insn) == FAR_JUMP_YES
21706 /* Record the fact that we have decided that
21707 the function does use far jumps. */
21708 cfun->machine->far_jump_used = 1;
21716 /* Return nonzero if FUNC must be entered in ARM mode. */
21718 is_called_in_ARM_mode (tree func)
21720 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21722 /* Ignore the problem about functions whose address is taken. */
21723 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21727 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21733 /* Given the stack offsets and register mask in OFFSETS, decide how
21734 many additional registers to push instead of subtracting a constant
21735 from SP. For epilogues the principle is the same except we use pop.
21736 FOR_PROLOGUE indicates which we're generating. */
21738 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21740 HOST_WIDE_INT amount;
21741 unsigned long live_regs_mask = offsets->saved_regs_mask;
21742 /* Extract a mask of the ones we can give to the Thumb's push/pop
21744 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21745 /* Then count how many other high registers will need to be pushed. */
21746 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21747 int n_free, reg_base;
21749 if (!for_prologue && frame_pointer_needed)
21750 amount = offsets->locals_base - offsets->saved_regs;
21752 amount = offsets->outgoing_args - offsets->saved_regs;
21754 /* If the stack frame size is 512 exactly, we can save one load
21755 instruction, which should make this a win even when optimizing
21757 if (!optimize_size && amount != 512)
21760 /* Can't do this if there are high registers to push. */
21761 if (high_regs_pushed != 0)
21764 /* Shouldn't do it in the prologue if no registers would normally
21765 be pushed at all. In the epilogue, also allow it if we'll have
21766 a pop insn for the PC. */
21769 || TARGET_BACKTRACE
21770 || (live_regs_mask & 1 << LR_REGNUM) == 0
21771 || TARGET_INTERWORK
21772 || crtl->args.pretend_args_size != 0))
21775 /* Don't do this if thumb_expand_prologue wants to emit instructions
21776 between the push and the stack frame allocation. */
21778 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21779 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21786 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21787 live_regs_mask >>= reg_base;
21790 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21791 && (for_prologue || call_used_regs[reg_base + n_free]))
21793 live_regs_mask >>= 1;
21799 gcc_assert (amount / 4 * 4 == amount);
21801 if (amount >= 512 && (amount - n_free * 4) < 512)
21802 return (amount - 508) / 4;
21803 if (amount <= n_free * 4)
21808 /* The bits which aren't usefully expanded as rtl. */
21810 thumb1_unexpanded_epilogue (void)
21812 arm_stack_offsets *offsets;
21814 unsigned long live_regs_mask = 0;
21815 int high_regs_pushed = 0;
21817 int had_to_push_lr;
21820 if (cfun->machine->return_used_this_function != 0)
21823 if (IS_NAKED (arm_current_func_type ()))
21826 offsets = arm_get_frame_offsets ();
21827 live_regs_mask = offsets->saved_regs_mask;
21828 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21830 /* If we can deduce the registers used from the function's return value.
21831 This is more reliable that examining df_regs_ever_live_p () because that
21832 will be set if the register is ever used in the function, not just if
21833 the register is used to hold a return value. */
21834 size = arm_size_return_regs ();
21836 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21839 unsigned long extra_mask = (1 << extra_pop) - 1;
21840 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21844 /* The prolog may have pushed some high registers to use as
21845 work registers. e.g. the testsuite file:
21846 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21847 compiles to produce:
21848 push {r4, r5, r6, r7, lr}
21852 as part of the prolog. We have to undo that pushing here. */
21854 if (high_regs_pushed)
21856 unsigned long mask = live_regs_mask & 0xff;
21859 /* The available low registers depend on the size of the value we are
21867 /* Oh dear! We have no low registers into which we can pop
21870 ("no low registers available for popping high registers");
21872 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21873 if (live_regs_mask & (1 << next_hi_reg))
21876 while (high_regs_pushed)
21878 /* Find lo register(s) into which the high register(s) can
21880 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21882 if (mask & (1 << regno))
21883 high_regs_pushed--;
21884 if (high_regs_pushed == 0)
21888 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21890 /* Pop the values into the low register(s). */
21891 thumb_pop (asm_out_file, mask);
21893 /* Move the value(s) into the high registers. */
21894 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21896 if (mask & (1 << regno))
21898 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21901 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21902 if (live_regs_mask & (1 << next_hi_reg))
21907 live_regs_mask &= ~0x0f00;
21910 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21911 live_regs_mask &= 0xff;
21913 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21915 /* Pop the return address into the PC. */
21916 if (had_to_push_lr)
21917 live_regs_mask |= 1 << PC_REGNUM;
21919 /* Either no argument registers were pushed or a backtrace
21920 structure was created which includes an adjusted stack
21921 pointer, so just pop everything. */
21922 if (live_regs_mask)
21923 thumb_pop (asm_out_file, live_regs_mask);
21925 /* We have either just popped the return address into the
21926 PC or it is was kept in LR for the entire function.
21927 Note that thumb_pop has already called thumb_exit if the
21928 PC was in the list. */
21929 if (!had_to_push_lr)
21930 thumb_exit (asm_out_file, LR_REGNUM);
21934 /* Pop everything but the return address. */
21935 if (live_regs_mask)
21936 thumb_pop (asm_out_file, live_regs_mask);
21938 if (had_to_push_lr)
21942 /* We have no free low regs, so save one. */
21943 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21947 /* Get the return address into a temporary register. */
21948 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21952 /* Move the return address to lr. */
21953 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21955 /* Restore the low register. */
21956 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21961 regno = LAST_ARG_REGNUM;
21966 /* Remove the argument registers that were pushed onto the stack. */
21967 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21968 SP_REGNUM, SP_REGNUM,
21969 crtl->args.pretend_args_size);
21971 thumb_exit (asm_out_file, regno);
21977 /* Functions to save and restore machine-specific function data. */
21978 static struct machine_function *
21979 arm_init_machine_status (void)
21981 struct machine_function *machine;
21982 machine = ggc_alloc_cleared_machine_function ();
21984 #if ARM_FT_UNKNOWN != 0
21985 machine->func_type = ARM_FT_UNKNOWN;
21990 /* Return an RTX indicating where the return address to the
21991 calling function can be found. */
21993 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21998 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22001 /* Do anything needed before RTL is emitted for each function. */
22003 arm_init_expanders (void)
22005 /* Arrange to initialize and mark the machine per-function status. */
22006 init_machine_status = arm_init_machine_status;
22008 /* This is to stop the combine pass optimizing away the alignment
22009 adjustment of va_arg. */
22010 /* ??? It is claimed that this should not be necessary. */
22012 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22016 /* Like arm_compute_initial_elimination offset. Simpler because there
22017 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22018 to point at the base of the local variables after static stack
22019 space for a function has been allocated. */
22022 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22024 arm_stack_offsets *offsets;
22026 offsets = arm_get_frame_offsets ();
22030 case ARG_POINTER_REGNUM:
22033 case STACK_POINTER_REGNUM:
22034 return offsets->outgoing_args - offsets->saved_args;
22036 case FRAME_POINTER_REGNUM:
22037 return offsets->soft_frame - offsets->saved_args;
22039 case ARM_HARD_FRAME_POINTER_REGNUM:
22040 return offsets->saved_regs - offsets->saved_args;
22042 case THUMB_HARD_FRAME_POINTER_REGNUM:
22043 return offsets->locals_base - offsets->saved_args;
22046 gcc_unreachable ();
22050 case FRAME_POINTER_REGNUM:
22053 case STACK_POINTER_REGNUM:
22054 return offsets->outgoing_args - offsets->soft_frame;
22056 case ARM_HARD_FRAME_POINTER_REGNUM:
22057 return offsets->saved_regs - offsets->soft_frame;
22059 case THUMB_HARD_FRAME_POINTER_REGNUM:
22060 return offsets->locals_base - offsets->soft_frame;
22063 gcc_unreachable ();
22068 gcc_unreachable ();
22072 /* Generate the function's prologue. */
22075 thumb1_expand_prologue (void)
22079 HOST_WIDE_INT amount;
22080 arm_stack_offsets *offsets;
22081 unsigned long func_type;
22083 unsigned long live_regs_mask;
22084 unsigned long l_mask;
22085 unsigned high_regs_pushed = 0;
22087 func_type = arm_current_func_type ();
22089 /* Naked functions don't have prologues. */
22090 if (IS_NAKED (func_type))
22093 if (IS_INTERRUPT (func_type))
22095 error ("interrupt Service Routines cannot be coded in Thumb mode");
22099 if (is_called_in_ARM_mode (current_function_decl))
22100 emit_insn (gen_prologue_thumb1_interwork ());
22102 offsets = arm_get_frame_offsets ();
22103 live_regs_mask = offsets->saved_regs_mask;
22105 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22106 l_mask = live_regs_mask & 0x40ff;
22107 /* Then count how many other high registers will need to be pushed. */
22108 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22110 if (crtl->args.pretend_args_size)
22112 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22114 if (cfun->machine->uses_anonymous_args)
22116 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22117 unsigned long mask;
22119 mask = 1ul << (LAST_ARG_REGNUM + 1);
22120 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22122 insn = thumb1_emit_multi_reg_push (mask, 0);
22126 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22127 stack_pointer_rtx, x));
22129 RTX_FRAME_RELATED_P (insn) = 1;
22132 if (TARGET_BACKTRACE)
22134 HOST_WIDE_INT offset = 0;
22135 unsigned work_register;
22136 rtx work_reg, x, arm_hfp_rtx;
22138 /* We have been asked to create a stack backtrace structure.
22139 The code looks like this:
22143 0 sub SP, #16 Reserve space for 4 registers.
22144 2 push {R7} Push low registers.
22145 4 add R7, SP, #20 Get the stack pointer before the push.
22146 6 str R7, [SP, #8] Store the stack pointer
22147 (before reserving the space).
22148 8 mov R7, PC Get hold of the start of this code + 12.
22149 10 str R7, [SP, #16] Store it.
22150 12 mov R7, FP Get hold of the current frame pointer.
22151 14 str R7, [SP, #4] Store it.
22152 16 mov R7, LR Get hold of the current return address.
22153 18 str R7, [SP, #12] Store it.
22154 20 add R7, SP, #16 Point at the start of the
22155 backtrace structure.
22156 22 mov FP, R7 Put this value into the frame pointer. */
22158 work_register = thumb_find_work_register (live_regs_mask);
22159 work_reg = gen_rtx_REG (SImode, work_register);
22160 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22162 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22163 stack_pointer_rtx, GEN_INT (-16)));
22164 RTX_FRAME_RELATED_P (insn) = 1;
22168 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22169 RTX_FRAME_RELATED_P (insn) = 1;
22171 offset = bit_count (l_mask) * UNITS_PER_WORD;
22174 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22175 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22177 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22178 x = gen_frame_mem (SImode, x);
22179 emit_move_insn (x, work_reg);
22181 /* Make sure that the instruction fetching the PC is in the right place
22182 to calculate "start of backtrace creation code + 12". */
22183 /* ??? The stores using the common WORK_REG ought to be enough to
22184 prevent the scheduler from doing anything weird. Failing that
22185 we could always move all of the following into an UNSPEC_VOLATILE. */
22188 x = gen_rtx_REG (SImode, PC_REGNUM);
22189 emit_move_insn (work_reg, x);
22191 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22192 x = gen_frame_mem (SImode, x);
22193 emit_move_insn (x, work_reg);
22195 emit_move_insn (work_reg, arm_hfp_rtx);
22197 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22198 x = gen_frame_mem (SImode, x);
22199 emit_move_insn (x, work_reg);
22203 emit_move_insn (work_reg, arm_hfp_rtx);
22205 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22206 x = gen_frame_mem (SImode, x);
22207 emit_move_insn (x, work_reg);
22209 x = gen_rtx_REG (SImode, PC_REGNUM);
22210 emit_move_insn (work_reg, x);
22212 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22213 x = gen_frame_mem (SImode, x);
22214 emit_move_insn (x, work_reg);
22217 x = gen_rtx_REG (SImode, LR_REGNUM);
22218 emit_move_insn (work_reg, x);
22220 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22221 x = gen_frame_mem (SImode, x);
22222 emit_move_insn (x, work_reg);
22224 x = GEN_INT (offset + 12);
22225 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22227 emit_move_insn (arm_hfp_rtx, work_reg);
22229 /* Optimization: If we are not pushing any low registers but we are going
22230 to push some high registers then delay our first push. This will just
22231 be a push of LR and we can combine it with the push of the first high
22233 else if ((l_mask & 0xff) != 0
22234 || (high_regs_pushed == 0 && l_mask))
22236 unsigned long mask = l_mask;
22237 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22238 insn = thumb1_emit_multi_reg_push (mask, mask);
22239 RTX_FRAME_RELATED_P (insn) = 1;
22242 if (high_regs_pushed)
22244 unsigned pushable_regs;
22245 unsigned next_hi_reg;
22247 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22248 if (live_regs_mask & (1 << next_hi_reg))
22251 pushable_regs = l_mask & 0xff;
22253 if (pushable_regs == 0)
22254 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22256 while (high_regs_pushed > 0)
22258 unsigned long real_regs_mask = 0;
22260 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22262 if (pushable_regs & (1 << regno))
22264 emit_move_insn (gen_rtx_REG (SImode, regno),
22265 gen_rtx_REG (SImode, next_hi_reg));
22267 high_regs_pushed --;
22268 real_regs_mask |= (1 << next_hi_reg);
22270 if (high_regs_pushed)
22272 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22274 if (live_regs_mask & (1 << next_hi_reg))
22279 pushable_regs &= ~((1 << regno) - 1);
22285 /* If we had to find a work register and we have not yet
22286 saved the LR then add it to the list of regs to push. */
22287 if (l_mask == (1 << LR_REGNUM))
22289 pushable_regs |= l_mask;
22290 real_regs_mask |= l_mask;
22294 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22295 RTX_FRAME_RELATED_P (insn) = 1;
22299 /* Load the pic register before setting the frame pointer,
22300 so we can use r7 as a temporary work register. */
22301 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22302 arm_load_pic_register (live_regs_mask);
22304 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22305 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22306 stack_pointer_rtx);
22308 if (flag_stack_usage_info)
22309 current_function_static_stack_size
22310 = offsets->outgoing_args - offsets->saved_args;
22312 amount = offsets->outgoing_args - offsets->saved_regs;
22313 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22318 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22319 GEN_INT (- amount)));
22320 RTX_FRAME_RELATED_P (insn) = 1;
22326 /* The stack decrement is too big for an immediate value in a single
22327 insn. In theory we could issue multiple subtracts, but after
22328 three of them it becomes more space efficient to place the full
22329 value in the constant pool and load into a register. (Also the
22330 ARM debugger really likes to see only one stack decrement per
22331 function). So instead we look for a scratch register into which
22332 we can load the decrement, and then we subtract this from the
22333 stack pointer. Unfortunately on the thumb the only available
22334 scratch registers are the argument registers, and we cannot use
22335 these as they may hold arguments to the function. Instead we
22336 attempt to locate a call preserved register which is used by this
22337 function. If we can find one, then we know that it will have
22338 been pushed at the start of the prologue and so we can corrupt
22340 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22341 if (live_regs_mask & (1 << regno))
22344 gcc_assert(regno <= LAST_LO_REGNUM);
22346 reg = gen_rtx_REG (SImode, regno);
22348 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22350 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22351 stack_pointer_rtx, reg));
22353 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22354 plus_constant (Pmode, stack_pointer_rtx,
22356 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22357 RTX_FRAME_RELATED_P (insn) = 1;
22361 if (frame_pointer_needed)
22362 thumb_set_frame_pointer (offsets);
22364 /* If we are profiling, make sure no instructions are scheduled before
22365 the call to mcount. Similarly if the user has requested no
22366 scheduling in the prolog. Similarly if we want non-call exceptions
22367 using the EABI unwinder, to prevent faulting instructions from being
22368 swapped with a stack adjustment. */
22369 if (crtl->profile || !TARGET_SCHED_PROLOG
22370 || (arm_except_unwind_info (&global_options) == UI_TARGET
22371 && cfun->can_throw_non_call_exceptions))
22372 emit_insn (gen_blockage ());
22374 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22375 if (live_regs_mask & 0xff)
22376 cfun->machine->lr_save_eliminated = 0;
22379 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22380 POP instruction can be generated. LR should be replaced by PC. All
22381 the checks required are already done by USE_RETURN_INSN (). Hence,
22382 all we really need to check here is if single register is to be
22383 returned, or multiple register return. */
22385 thumb2_expand_return (void)
22388 unsigned long saved_regs_mask;
22389 arm_stack_offsets *offsets;
22391 offsets = arm_get_frame_offsets ();
22392 saved_regs_mask = offsets->saved_regs_mask;
22394 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22395 if (saved_regs_mask & (1 << i))
22398 if (saved_regs_mask)
22402 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22403 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22404 rtx addr = gen_rtx_MEM (SImode,
22405 gen_rtx_POST_INC (SImode,
22406 stack_pointer_rtx));
22407 set_mem_alias_set (addr, get_frame_alias_set ());
22408 XVECEXP (par, 0, 0) = ret_rtx;
22409 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22410 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22411 emit_jump_insn (par);
22415 saved_regs_mask &= ~ (1 << LR_REGNUM);
22416 saved_regs_mask |= (1 << PC_REGNUM);
22417 arm_emit_multi_reg_pop (saved_regs_mask);
22422 emit_jump_insn (simple_return_rtx);
22427 thumb1_expand_epilogue (void)
22429 HOST_WIDE_INT amount;
22430 arm_stack_offsets *offsets;
22433 /* Naked functions don't have prologues. */
22434 if (IS_NAKED (arm_current_func_type ()))
22437 offsets = arm_get_frame_offsets ();
22438 amount = offsets->outgoing_args - offsets->saved_regs;
22440 if (frame_pointer_needed)
22442 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22443 amount = offsets->locals_base - offsets->saved_regs;
22445 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22447 gcc_assert (amount >= 0);
22450 emit_insn (gen_blockage ());
22453 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22454 GEN_INT (amount)));
22457 /* r3 is always free in the epilogue. */
22458 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22460 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22461 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22465 /* Emit a USE (stack_pointer_rtx), so that
22466 the stack adjustment will not be deleted. */
22467 emit_insn (gen_prologue_use (stack_pointer_rtx));
22469 if (crtl->profile || !TARGET_SCHED_PROLOG)
22470 emit_insn (gen_blockage ());
22472 /* Emit a clobber for each insn that will be restored in the epilogue,
22473 so that flow2 will get register lifetimes correct. */
22474 for (regno = 0; regno < 13; regno++)
22475 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22476 emit_clobber (gen_rtx_REG (SImode, regno));
22478 if (! df_regs_ever_live_p (LR_REGNUM))
22479 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22482 /* Epilogue code for APCS frame. */
22484 arm_expand_epilogue_apcs_frame (bool really_return)
22486 unsigned long func_type;
22487 unsigned long saved_regs_mask;
22490 int floats_from_frame = 0;
22491 arm_stack_offsets *offsets;
22493 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22494 func_type = arm_current_func_type ();
22496 /* Get frame offsets for ARM. */
22497 offsets = arm_get_frame_offsets ();
22498 saved_regs_mask = offsets->saved_regs_mask;
22500 /* Find the offset of the floating-point save area in the frame. */
22501 floats_from_frame = offsets->saved_args - offsets->frame;
22503 /* Compute how many core registers saved and how far away the floats are. */
22504 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22505 if (saved_regs_mask & (1 << i))
22508 floats_from_frame += 4;
22511 if (TARGET_HARD_FLOAT && TARGET_VFP)
22515 /* The offset is from IP_REGNUM. */
22516 int saved_size = arm_get_vfp_saved_size ();
22517 if (saved_size > 0)
22519 floats_from_frame += saved_size;
22520 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22521 hard_frame_pointer_rtx,
22522 GEN_INT (-floats_from_frame)));
22525 /* Generate VFP register multi-pop. */
22526 start_reg = FIRST_VFP_REGNUM;
22528 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22529 /* Look for a case where a reg does not need restoring. */
22530 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22531 && (!df_regs_ever_live_p (i + 1)
22532 || call_used_regs[i + 1]))
22534 if (start_reg != i)
22535 arm_emit_vfp_multi_reg_pop (start_reg,
22536 (i - start_reg) / 2,
22537 gen_rtx_REG (SImode,
22542 /* Restore the remaining regs that we have discovered (or possibly
22543 even all of them, if the conditional in the for loop never
22545 if (start_reg != i)
22546 arm_emit_vfp_multi_reg_pop (start_reg,
22547 (i - start_reg) / 2,
22548 gen_rtx_REG (SImode, IP_REGNUM));
22553 /* The frame pointer is guaranteed to be non-double-word aligned, as
22554 it is set to double-word-aligned old_stack_pointer - 4. */
22556 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22558 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22559 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22561 rtx addr = gen_frame_mem (V2SImode,
22562 plus_constant (Pmode, hard_frame_pointer_rtx,
22564 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22565 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22566 gen_rtx_REG (V2SImode, i),
22572 /* saved_regs_mask should contain IP which contains old stack pointer
22573 at the time of activation creation. Since SP and IP are adjacent registers,
22574 we can restore the value directly into SP. */
22575 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22576 saved_regs_mask &= ~(1 << IP_REGNUM);
22577 saved_regs_mask |= (1 << SP_REGNUM);
22579 /* There are two registers left in saved_regs_mask - LR and PC. We
22580 only need to restore LR (the return address), but to
22581 save time we can load it directly into PC, unless we need a
22582 special function exit sequence, or we are not really returning. */
22584 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22585 && !crtl->calls_eh_return)
22586 /* Delete LR from the register mask, so that LR on
22587 the stack is loaded into the PC in the register mask. */
22588 saved_regs_mask &= ~(1 << LR_REGNUM);
22590 saved_regs_mask &= ~(1 << PC_REGNUM);
22592 num_regs = bit_count (saved_regs_mask);
22593 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22595 /* Unwind the stack to just below the saved registers. */
22596 emit_insn (gen_addsi3 (stack_pointer_rtx,
22597 hard_frame_pointer_rtx,
22598 GEN_INT (- 4 * num_regs)));
22601 arm_emit_multi_reg_pop (saved_regs_mask);
22603 if (IS_INTERRUPT (func_type))
22605 /* Interrupt handlers will have pushed the
22606 IP onto the stack, so restore it now. */
22608 rtx addr = gen_rtx_MEM (SImode,
22609 gen_rtx_POST_INC (SImode,
22610 stack_pointer_rtx));
22611 set_mem_alias_set (addr, get_frame_alias_set ());
22612 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22613 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22614 gen_rtx_REG (SImode, IP_REGNUM),
22618 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22621 if (crtl->calls_eh_return)
22622 emit_insn (gen_addsi3 (stack_pointer_rtx,
22624 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22626 if (IS_STACKALIGN (func_type))
22627 /* Restore the original stack pointer. Before prologue, the stack was
22628 realigned and the original stack pointer saved in r0. For details,
22629 see comment in arm_expand_prologue. */
22630 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22632 emit_jump_insn (simple_return_rtx);
22635 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22636 function is not a sibcall. */
22638 arm_expand_epilogue (bool really_return)
22640 unsigned long func_type;
22641 unsigned long saved_regs_mask;
22645 int floats_from_frame = 0;
22646 arm_stack_offsets *offsets;
22648 func_type = arm_current_func_type ();
22650 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22651 let output_return_instruction take care of instruction emition if any. */
22652 if (IS_NAKED (func_type)
22653 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22655 emit_jump_insn (simple_return_rtx);
22659 /* If we are throwing an exception, then we really must be doing a
22660 return, so we can't tail-call. */
22661 gcc_assert (!crtl->calls_eh_return || really_return);
22663 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22665 arm_expand_epilogue_apcs_frame (really_return);
22669 /* Get frame offsets for ARM. */
22670 offsets = arm_get_frame_offsets ();
22671 saved_regs_mask = offsets->saved_regs_mask;
22673 /* Find offset of floating point register from frame pointer.
22674 The initialization is done in this way to take care of frame pointer
22675 and static-chain register, if stored. */
22676 floats_from_frame = offsets->saved_args - offsets->frame;
22677 /* Compute how many registers saved and how far away the floats will be. */
22678 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22679 if (saved_regs_mask & (1 << i))
22682 floats_from_frame += 4;
22685 if (frame_pointer_needed)
22687 /* Restore stack pointer if necessary. */
22690 /* In ARM mode, frame pointer points to first saved register.
22691 Restore stack pointer to last saved register. */
22692 amount = offsets->frame - offsets->saved_regs;
22694 /* Force out any pending memory operations that reference stacked data
22695 before stack de-allocation occurs. */
22696 emit_insn (gen_blockage ());
22697 emit_insn (gen_addsi3 (stack_pointer_rtx,
22698 hard_frame_pointer_rtx,
22699 GEN_INT (amount)));
22701 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22703 emit_insn (gen_prologue_use (stack_pointer_rtx));
22707 /* In Thumb-2 mode, the frame pointer points to the last saved
22709 amount = offsets->locals_base - offsets->saved_regs;
22711 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22712 hard_frame_pointer_rtx,
22713 GEN_INT (amount)));
22715 /* Force out any pending memory operations that reference stacked data
22716 before stack de-allocation occurs. */
22717 emit_insn (gen_blockage ());
22718 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22719 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22721 emit_insn (gen_prologue_use (stack_pointer_rtx));
22726 /* Pop off outgoing args and local frame to adjust stack pointer to
22727 last saved register. */
22728 amount = offsets->outgoing_args - offsets->saved_regs;
22731 /* Force out any pending memory operations that reference stacked data
22732 before stack de-allocation occurs. */
22733 emit_insn (gen_blockage ());
22734 emit_insn (gen_addsi3 (stack_pointer_rtx,
22736 GEN_INT (amount)));
22737 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22739 emit_insn (gen_prologue_use (stack_pointer_rtx));
22743 if (TARGET_HARD_FLOAT && TARGET_VFP)
22745 /* Generate VFP register multi-pop. */
22746 int end_reg = LAST_VFP_REGNUM + 1;
22748 /* Scan the registers in reverse order. We need to match
22749 any groupings made in the prologue and generate matching
22750 vldm operations. The need to match groups is because,
22751 unlike pop, vldm can only do consecutive regs. */
22752 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22753 /* Look for a case where a reg does not need restoring. */
22754 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22755 && (!df_regs_ever_live_p (i + 1)
22756 || call_used_regs[i + 1]))
22758 /* Restore the regs discovered so far (from reg+2 to
22760 if (end_reg > i + 2)
22761 arm_emit_vfp_multi_reg_pop (i + 2,
22762 (end_reg - (i + 2)) / 2,
22763 stack_pointer_rtx);
22767 /* Restore the remaining regs that we have discovered (or possibly
22768 even all of them, if the conditional in the for loop never
22770 if (end_reg > i + 2)
22771 arm_emit_vfp_multi_reg_pop (i + 2,
22772 (end_reg - (i + 2)) / 2,
22773 stack_pointer_rtx);
22777 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
22778 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22781 rtx addr = gen_rtx_MEM (V2SImode,
22782 gen_rtx_POST_INC (SImode,
22783 stack_pointer_rtx));
22784 set_mem_alias_set (addr, get_frame_alias_set ());
22785 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22786 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22787 gen_rtx_REG (V2SImode, i),
22791 if (saved_regs_mask)
22794 bool return_in_pc = false;
22796 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
22797 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
22798 && !IS_STACKALIGN (func_type)
22800 && crtl->args.pretend_args_size == 0
22801 && saved_regs_mask & (1 << LR_REGNUM)
22802 && !crtl->calls_eh_return)
22804 saved_regs_mask &= ~(1 << LR_REGNUM);
22805 saved_regs_mask |= (1 << PC_REGNUM);
22806 return_in_pc = true;
22809 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
22811 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22812 if (saved_regs_mask & (1 << i))
22814 rtx addr = gen_rtx_MEM (SImode,
22815 gen_rtx_POST_INC (SImode,
22816 stack_pointer_rtx));
22817 set_mem_alias_set (addr, get_frame_alias_set ());
22819 if (i == PC_REGNUM)
22821 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22822 XVECEXP (insn, 0, 0) = ret_rtx;
22823 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
22824 gen_rtx_REG (SImode, i),
22826 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
22827 insn = emit_jump_insn (insn);
22831 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
22833 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22834 gen_rtx_REG (SImode, i),
22841 arm_emit_multi_reg_pop (saved_regs_mask);
22844 if (return_in_pc == true)
22848 if (crtl->args.pretend_args_size)
22849 emit_insn (gen_addsi3 (stack_pointer_rtx,
22851 GEN_INT (crtl->args.pretend_args_size)));
22853 if (!really_return)
22856 if (crtl->calls_eh_return)
22857 emit_insn (gen_addsi3 (stack_pointer_rtx,
22859 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
22861 if (IS_STACKALIGN (func_type))
22862 /* Restore the original stack pointer. Before prologue, the stack was
22863 realigned and the original stack pointer saved in r0. For details,
22864 see comment in arm_expand_prologue. */
22865 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22867 emit_jump_insn (simple_return_rtx);
22870 /* Implementation of insn prologue_thumb1_interwork. This is the first
22871 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22874 thumb1_output_interwork (void)
22877 FILE *f = asm_out_file;
22879 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22880 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22882 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22884 /* Generate code sequence to switch us into Thumb mode. */
22885 /* The .code 32 directive has already been emitted by
22886 ASM_DECLARE_FUNCTION_NAME. */
22887 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22888 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22890 /* Generate a label, so that the debugger will notice the
22891 change in instruction sets. This label is also used by
22892 the assembler to bypass the ARM code when this function
22893 is called from a Thumb encoded function elsewhere in the
22894 same file. Hence the definition of STUB_NAME here must
22895 agree with the definition in gas/config/tc-arm.c. */
22897 #define STUB_NAME ".real_start_of"
22899 fprintf (f, "\t.code\t16\n");
22901 if (arm_dllexport_name_p (name))
22902 name = arm_strip_name_encoding (name);
22904 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22905 fprintf (f, "\t.thumb_func\n");
22906 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22911 /* Handle the case of a double word load into a low register from
22912 a computed memory address. The computed address may involve a
22913 register which is overwritten by the load. */
22915 thumb_load_double_from_address (rtx *operands)
22923 gcc_assert (GET_CODE (operands[0]) == REG);
22924 gcc_assert (GET_CODE (operands[1]) == MEM);
22926 /* Get the memory address. */
22927 addr = XEXP (operands[1], 0);
22929 /* Work out how the memory address is computed. */
22930 switch (GET_CODE (addr))
22933 operands[2] = adjust_address (operands[1], SImode, 4);
22935 if (REGNO (operands[0]) == REGNO (addr))
22937 output_asm_insn ("ldr\t%H0, %2", operands);
22938 output_asm_insn ("ldr\t%0, %1", operands);
22942 output_asm_insn ("ldr\t%0, %1", operands);
22943 output_asm_insn ("ldr\t%H0, %2", operands);
22948 /* Compute <address> + 4 for the high order load. */
22949 operands[2] = adjust_address (operands[1], SImode, 4);
22951 output_asm_insn ("ldr\t%0, %1", operands);
22952 output_asm_insn ("ldr\t%H0, %2", operands);
22956 arg1 = XEXP (addr, 0);
22957 arg2 = XEXP (addr, 1);
22959 if (CONSTANT_P (arg1))
22960 base = arg2, offset = arg1;
22962 base = arg1, offset = arg2;
22964 gcc_assert (GET_CODE (base) == REG);
22966 /* Catch the case of <address> = <reg> + <reg> */
22967 if (GET_CODE (offset) == REG)
22969 int reg_offset = REGNO (offset);
22970 int reg_base = REGNO (base);
22971 int reg_dest = REGNO (operands[0]);
22973 /* Add the base and offset registers together into the
22974 higher destination register. */
22975 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22976 reg_dest + 1, reg_base, reg_offset);
22978 /* Load the lower destination register from the address in
22979 the higher destination register. */
22980 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22981 reg_dest, reg_dest + 1);
22983 /* Load the higher destination register from its own address
22985 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22986 reg_dest + 1, reg_dest + 1);
22990 /* Compute <address> + 4 for the high order load. */
22991 operands[2] = adjust_address (operands[1], SImode, 4);
22993 /* If the computed address is held in the low order register
22994 then load the high order register first, otherwise always
22995 load the low order register first. */
22996 if (REGNO (operands[0]) == REGNO (base))
22998 output_asm_insn ("ldr\t%H0, %2", operands);
22999 output_asm_insn ("ldr\t%0, %1", operands);
23003 output_asm_insn ("ldr\t%0, %1", operands);
23004 output_asm_insn ("ldr\t%H0, %2", operands);
23010 /* With no registers to worry about we can just load the value
23012 operands[2] = adjust_address (operands[1], SImode, 4);
23014 output_asm_insn ("ldr\t%H0, %2", operands);
23015 output_asm_insn ("ldr\t%0, %1", operands);
23019 gcc_unreachable ();
23026 thumb_output_move_mem_multiple (int n, rtx *operands)
23033 if (REGNO (operands[4]) > REGNO (operands[5]))
23036 operands[4] = operands[5];
23039 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23040 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23044 if (REGNO (operands[4]) > REGNO (operands[5]))
23047 operands[4] = operands[5];
23050 if (REGNO (operands[5]) > REGNO (operands[6]))
23053 operands[5] = operands[6];
23056 if (REGNO (operands[4]) > REGNO (operands[5]))
23059 operands[4] = operands[5];
23063 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23064 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23068 gcc_unreachable ();
23074 /* Output a call-via instruction for thumb state. */
23076 thumb_call_via_reg (rtx reg)
23078 int regno = REGNO (reg);
23081 gcc_assert (regno < LR_REGNUM);
23083 /* If we are in the normal text section we can use a single instance
23084 per compilation unit. If we are doing function sections, then we need
23085 an entry per section, since we can't rely on reachability. */
23086 if (in_section == text_section)
23088 thumb_call_reg_needed = 1;
23090 if (thumb_call_via_label[regno] == NULL)
23091 thumb_call_via_label[regno] = gen_label_rtx ();
23092 labelp = thumb_call_via_label + regno;
23096 if (cfun->machine->call_via[regno] == NULL)
23097 cfun->machine->call_via[regno] = gen_label_rtx ();
23098 labelp = cfun->machine->call_via + regno;
23101 output_asm_insn ("bl\t%a0", labelp);
23105 /* Routines for generating rtl. */
23107 thumb_expand_movmemqi (rtx *operands)
23109 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23110 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23111 HOST_WIDE_INT len = INTVAL (operands[2]);
23112 HOST_WIDE_INT offset = 0;
23116 emit_insn (gen_movmem12b (out, in, out, in));
23122 emit_insn (gen_movmem8b (out, in, out, in));
23128 rtx reg = gen_reg_rtx (SImode);
23129 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23130 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23137 rtx reg = gen_reg_rtx (HImode);
23138 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23139 plus_constant (Pmode, in,
23141 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23150 rtx reg = gen_reg_rtx (QImode);
23151 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23152 plus_constant (Pmode, in,
23154 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23161 thumb_reload_out_hi (rtx *operands)
23163 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23166 /* Handle reading a half-word from memory during reload. */
23168 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23170 gcc_unreachable ();
23173 /* Return the length of a function name prefix
23174 that starts with the character 'c'. */
23176 arm_get_strip_length (int c)
23180 ARM_NAME_ENCODING_LENGTHS
23185 /* Return a pointer to a function's name with any
23186 and all prefix encodings stripped from it. */
23188 arm_strip_name_encoding (const char *name)
23192 while ((skip = arm_get_strip_length (* name)))
23198 /* If there is a '*' anywhere in the name's prefix, then
23199 emit the stripped name verbatim, otherwise prepend an
23200 underscore if leading underscores are being used. */
23202 arm_asm_output_labelref (FILE *stream, const char *name)
23207 while ((skip = arm_get_strip_length (* name)))
23209 verbatim |= (*name == '*');
23214 fputs (name, stream);
23216 asm_fprintf (stream, "%U%s", name);
23219 /* This function is used to emit an EABI tag and its associated value.
23220 We emit the numerical value of the tag in case the assembler does not
23221 support textual tags. (Eg gas prior to 2.20). If requested we include
23222 the tag name in a comment so that anyone reading the assembler output
23223 will know which tag is being set.
23225 This function is not static because arm-c.c needs it too. */
23228 arm_emit_eabi_attribute (const char *name, int num, int val)
23230 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23231 if (flag_verbose_asm || flag_debug_asm)
23232 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23233 asm_fprintf (asm_out_file, "\n");
23237 arm_file_start (void)
23241 if (TARGET_UNIFIED_ASM)
23242 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23246 const char *fpu_name;
23247 if (arm_selected_arch)
23248 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23249 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23250 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23252 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23254 if (TARGET_SOFT_FLOAT)
23256 fpu_name = "softvfp";
23260 fpu_name = arm_fpu_desc->name;
23261 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23263 if (TARGET_HARD_FLOAT)
23264 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23265 if (TARGET_HARD_FLOAT_ABI)
23266 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23269 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23271 /* Some of these attributes only apply when the corresponding features
23272 are used. However we don't have any easy way of figuring this out.
23273 Conservatively record the setting that would have been used. */
23275 if (flag_rounding_math)
23276 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23278 if (!flag_unsafe_math_optimizations)
23280 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23281 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23283 if (flag_signaling_nans)
23284 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23286 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23287 flag_finite_math_only ? 1 : 3);
23289 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23290 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23291 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23292 flag_short_enums ? 1 : 2);
23294 /* Tag_ABI_optimization_goals. */
23297 else if (optimize >= 2)
23303 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23305 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23308 if (arm_fp16_format)
23309 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23310 (int) arm_fp16_format);
23312 if (arm_lang_output_object_attributes_hook)
23313 arm_lang_output_object_attributes_hook();
23316 default_file_start ();
23320 arm_file_end (void)
23324 if (NEED_INDICATE_EXEC_STACK)
23325 /* Add .note.GNU-stack. */
23326 file_end_indicate_exec_stack ();
23328 if (! thumb_call_reg_needed)
23331 switch_to_section (text_section);
23332 asm_fprintf (asm_out_file, "\t.code 16\n");
23333 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23335 for (regno = 0; regno < LR_REGNUM; regno++)
23337 rtx label = thumb_call_via_label[regno];
23341 targetm.asm_out.internal_label (asm_out_file, "L",
23342 CODE_LABEL_NUMBER (label));
23343 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23349 /* Symbols in the text segment can be accessed without indirecting via the
23350 constant pool; it may take an extra binary operation, but this is still
23351 faster than indirecting via memory. Don't do this when not optimizing,
23352 since we won't be calculating al of the offsets necessary to do this
23356 arm_encode_section_info (tree decl, rtx rtl, int first)
23358 if (optimize > 0 && TREE_CONSTANT (decl))
23359 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23361 default_encode_section_info (decl, rtl, first);
23363 #endif /* !ARM_PE */
23366 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23368 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23369 && !strcmp (prefix, "L"))
23371 arm_ccfsm_state = 0;
23372 arm_target_insn = NULL;
23374 default_internal_label (stream, prefix, labelno);
23377 /* Output code to add DELTA to the first argument, and then jump
23378 to FUNCTION. Used for C++ multiple inheritance. */
23380 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23381 HOST_WIDE_INT delta,
23382 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23385 static int thunk_label = 0;
23388 int mi_delta = delta;
23389 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23391 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23394 mi_delta = - mi_delta;
23398 int labelno = thunk_label++;
23399 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23400 /* Thunks are entered in arm mode when avaiable. */
23401 if (TARGET_THUMB1_ONLY)
23403 /* push r3 so we can use it as a temporary. */
23404 /* TODO: Omit this save if r3 is not used. */
23405 fputs ("\tpush {r3}\n", file);
23406 fputs ("\tldr\tr3, ", file);
23410 fputs ("\tldr\tr12, ", file);
23412 assemble_name (file, label);
23413 fputc ('\n', file);
23416 /* If we are generating PIC, the ldr instruction below loads
23417 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23418 the address of the add + 8, so we have:
23420 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23423 Note that we have "+ 1" because some versions of GNU ld
23424 don't set the low bit of the result for R_ARM_REL32
23425 relocations against thumb function symbols.
23426 On ARMv6M this is +4, not +8. */
23427 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23428 assemble_name (file, labelpc);
23429 fputs (":\n", file);
23430 if (TARGET_THUMB1_ONLY)
23432 /* This is 2 insns after the start of the thunk, so we know it
23433 is 4-byte aligned. */
23434 fputs ("\tadd\tr3, pc, r3\n", file);
23435 fputs ("\tmov r12, r3\n", file);
23438 fputs ("\tadd\tr12, pc, r12\n", file);
23440 else if (TARGET_THUMB1_ONLY)
23441 fputs ("\tmov r12, r3\n", file);
23443 if (TARGET_THUMB1_ONLY)
23445 if (mi_delta > 255)
23447 fputs ("\tldr\tr3, ", file);
23448 assemble_name (file, label);
23449 fputs ("+4\n", file);
23450 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23451 mi_op, this_regno, this_regno);
23453 else if (mi_delta != 0)
23455 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23456 mi_op, this_regno, this_regno,
23462 /* TODO: Use movw/movt for large constants when available. */
23463 while (mi_delta != 0)
23465 if ((mi_delta & (3 << shift)) == 0)
23469 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23470 mi_op, this_regno, this_regno,
23471 mi_delta & (0xff << shift));
23472 mi_delta &= ~(0xff << shift);
23479 if (TARGET_THUMB1_ONLY)
23480 fputs ("\tpop\t{r3}\n", file);
23482 fprintf (file, "\tbx\tr12\n");
23483 ASM_OUTPUT_ALIGN (file, 2);
23484 assemble_name (file, label);
23485 fputs (":\n", file);
23488 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23489 rtx tem = XEXP (DECL_RTL (function), 0);
23490 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23491 tem = gen_rtx_MINUS (GET_MODE (tem),
23493 gen_rtx_SYMBOL_REF (Pmode,
23494 ggc_strdup (labelpc)));
23495 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23498 /* Output ".word .LTHUNKn". */
23499 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23501 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23502 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23506 fputs ("\tb\t", file);
23507 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23508 if (NEED_PLT_RELOC)
23509 fputs ("(PLT)", file);
23510 fputc ('\n', file);
23515 arm_emit_vector_const (FILE *file, rtx x)
23518 const char * pattern;
23520 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23522 switch (GET_MODE (x))
23524 case V2SImode: pattern = "%08x"; break;
23525 case V4HImode: pattern = "%04x"; break;
23526 case V8QImode: pattern = "%02x"; break;
23527 default: gcc_unreachable ();
23530 fprintf (file, "0x");
23531 for (i = CONST_VECTOR_NUNITS (x); i--;)
23535 element = CONST_VECTOR_ELT (x, i);
23536 fprintf (file, pattern, INTVAL (element));
23542 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23543 HFmode constant pool entries are actually loaded with ldr. */
23545 arm_emit_fp16_const (rtx c)
23550 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23551 bits = real_to_target (NULL, &r, HFmode);
23552 if (WORDS_BIG_ENDIAN)
23553 assemble_zeros (2);
23554 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23555 if (!WORDS_BIG_ENDIAN)
23556 assemble_zeros (2);
23560 arm_output_load_gr (rtx *operands)
23567 if (GET_CODE (operands [1]) != MEM
23568 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23569 || GET_CODE (reg = XEXP (sum, 0)) != REG
23570 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23571 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23572 return "wldrw%?\t%0, %1";
23574 /* Fix up an out-of-range load of a GR register. */
23575 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23576 wcgr = operands[0];
23578 output_asm_insn ("ldr%?\t%0, %1", operands);
23580 operands[0] = wcgr;
23582 output_asm_insn ("tmcr%?\t%0, %1", operands);
23583 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23588 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23590 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23591 named arg and all anonymous args onto the stack.
23592 XXX I know the prologue shouldn't be pushing registers, but it is faster
23596 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23597 enum machine_mode mode,
23600 int second_time ATTRIBUTE_UNUSED)
23602 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23605 cfun->machine->uses_anonymous_args = 1;
23606 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23608 nregs = pcum->aapcs_ncrn;
23609 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23613 nregs = pcum->nregs;
23615 if (nregs < NUM_ARG_REGS)
23616 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23619 /* Return nonzero if the CONSUMER instruction (a store) does not need
23620 PRODUCER's value to calculate the address. */
23623 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23625 rtx value = PATTERN (producer);
23626 rtx addr = PATTERN (consumer);
23628 if (GET_CODE (value) == COND_EXEC)
23629 value = COND_EXEC_CODE (value);
23630 if (GET_CODE (value) == PARALLEL)
23631 value = XVECEXP (value, 0, 0);
23632 value = XEXP (value, 0);
23633 if (GET_CODE (addr) == COND_EXEC)
23634 addr = COND_EXEC_CODE (addr);
23635 if (GET_CODE (addr) == PARALLEL)
23636 addr = XVECEXP (addr, 0, 0);
23637 addr = XEXP (addr, 0);
23639 return !reg_overlap_mentioned_p (value, addr);
23642 /* Return nonzero if the CONSUMER instruction (a store) does need
23643 PRODUCER's value to calculate the address. */
23646 arm_early_store_addr_dep (rtx producer, rtx consumer)
23648 return !arm_no_early_store_addr_dep (producer, consumer);
23651 /* Return nonzero if the CONSUMER instruction (a load) does need
23652 PRODUCER's value to calculate the address. */
23655 arm_early_load_addr_dep (rtx producer, rtx consumer)
23657 rtx value = PATTERN (producer);
23658 rtx addr = PATTERN (consumer);
23660 if (GET_CODE (value) == COND_EXEC)
23661 value = COND_EXEC_CODE (value);
23662 if (GET_CODE (value) == PARALLEL)
23663 value = XVECEXP (value, 0, 0);
23664 value = XEXP (value, 0);
23665 if (GET_CODE (addr) == COND_EXEC)
23666 addr = COND_EXEC_CODE (addr);
23667 if (GET_CODE (addr) == PARALLEL)
23669 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
23670 addr = XVECEXP (addr, 0, 1);
23672 addr = XVECEXP (addr, 0, 0);
23674 addr = XEXP (addr, 1);
23676 return reg_overlap_mentioned_p (value, addr);
23679 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23680 have an early register shift value or amount dependency on the
23681 result of PRODUCER. */
23684 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23686 rtx value = PATTERN (producer);
23687 rtx op = PATTERN (consumer);
23690 if (GET_CODE (value) == COND_EXEC)
23691 value = COND_EXEC_CODE (value);
23692 if (GET_CODE (value) == PARALLEL)
23693 value = XVECEXP (value, 0, 0);
23694 value = XEXP (value, 0);
23695 if (GET_CODE (op) == COND_EXEC)
23696 op = COND_EXEC_CODE (op);
23697 if (GET_CODE (op) == PARALLEL)
23698 op = XVECEXP (op, 0, 0);
23701 early_op = XEXP (op, 0);
23702 /* This is either an actual independent shift, or a shift applied to
23703 the first operand of another operation. We want the whole shift
23705 if (GET_CODE (early_op) == REG)
23708 return !reg_overlap_mentioned_p (value, early_op);
23711 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23712 have an early register shift value dependency on the result of
23716 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23718 rtx value = PATTERN (producer);
23719 rtx op = PATTERN (consumer);
23722 if (GET_CODE (value) == COND_EXEC)
23723 value = COND_EXEC_CODE (value);
23724 if (GET_CODE (value) == PARALLEL)
23725 value = XVECEXP (value, 0, 0);
23726 value = XEXP (value, 0);
23727 if (GET_CODE (op) == COND_EXEC)
23728 op = COND_EXEC_CODE (op);
23729 if (GET_CODE (op) == PARALLEL)
23730 op = XVECEXP (op, 0, 0);
23733 early_op = XEXP (op, 0);
23735 /* This is either an actual independent shift, or a shift applied to
23736 the first operand of another operation. We want the value being
23737 shifted, in either case. */
23738 if (GET_CODE (early_op) != REG)
23739 early_op = XEXP (early_op, 0);
23741 return !reg_overlap_mentioned_p (value, early_op);
23744 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23745 have an early register mult dependency on the result of
23749 arm_no_early_mul_dep (rtx producer, rtx consumer)
23751 rtx value = PATTERN (producer);
23752 rtx op = PATTERN (consumer);
23754 if (GET_CODE (value) == COND_EXEC)
23755 value = COND_EXEC_CODE (value);
23756 if (GET_CODE (value) == PARALLEL)
23757 value = XVECEXP (value, 0, 0);
23758 value = XEXP (value, 0);
23759 if (GET_CODE (op) == COND_EXEC)
23760 op = COND_EXEC_CODE (op);
23761 if (GET_CODE (op) == PARALLEL)
23762 op = XVECEXP (op, 0, 0);
23765 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23767 if (GET_CODE (XEXP (op, 0)) == MULT)
23768 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23770 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23776 /* We can't rely on the caller doing the proper promotion when
23777 using APCS or ATPCS. */
23780 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23782 return !TARGET_AAPCS_BASED;
23785 static enum machine_mode
23786 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23787 enum machine_mode mode,
23788 int *punsignedp ATTRIBUTE_UNUSED,
23789 const_tree fntype ATTRIBUTE_UNUSED,
23790 int for_return ATTRIBUTE_UNUSED)
23792 if (GET_MODE_CLASS (mode) == MODE_INT
23793 && GET_MODE_SIZE (mode) < 4)
23799 /* AAPCS based ABIs use short enums by default. */
23802 arm_default_short_enums (void)
23804 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23808 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23811 arm_align_anon_bitfield (void)
23813 return TARGET_AAPCS_BASED;
23817 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23820 arm_cxx_guard_type (void)
23822 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23825 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23826 has an accumulator dependency on the result of the producer (a
23827 multiplication instruction) and no other dependency on that result. */
23829 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23831 rtx mul = PATTERN (producer);
23832 rtx mac = PATTERN (consumer);
23834 rtx mac_op0, mac_op1, mac_acc;
23836 if (GET_CODE (mul) == COND_EXEC)
23837 mul = COND_EXEC_CODE (mul);
23838 if (GET_CODE (mac) == COND_EXEC)
23839 mac = COND_EXEC_CODE (mac);
23841 /* Check that mul is of the form (set (...) (mult ...))
23842 and mla is of the form (set (...) (plus (mult ...) (...))). */
23843 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23844 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23845 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23848 mul_result = XEXP (mul, 0);
23849 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23850 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23851 mac_acc = XEXP (XEXP (mac, 1), 1);
23853 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23854 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23855 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23859 /* The EABI says test the least significant bit of a guard variable. */
23862 arm_cxx_guard_mask_bit (void)
23864 return TARGET_AAPCS_BASED;
23868 /* The EABI specifies that all array cookies are 8 bytes long. */
23871 arm_get_cookie_size (tree type)
23875 if (!TARGET_AAPCS_BASED)
23876 return default_cxx_get_cookie_size (type);
23878 size = build_int_cst (sizetype, 8);
23883 /* The EABI says that array cookies should also contain the element size. */
23886 arm_cookie_has_size (void)
23888 return TARGET_AAPCS_BASED;
23892 /* The EABI says constructors and destructors should return a pointer to
23893 the object constructed/destroyed. */
23896 arm_cxx_cdtor_returns_this (void)
23898 return TARGET_AAPCS_BASED;
23901 /* The EABI says that an inline function may never be the key
23905 arm_cxx_key_method_may_be_inline (void)
23907 return !TARGET_AAPCS_BASED;
23911 arm_cxx_determine_class_data_visibility (tree decl)
23913 if (!TARGET_AAPCS_BASED
23914 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23917 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23918 is exported. However, on systems without dynamic vague linkage,
23919 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23920 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23921 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23923 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23924 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23928 arm_cxx_class_data_always_comdat (void)
23930 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23931 vague linkage if the class has no key function. */
23932 return !TARGET_AAPCS_BASED;
23936 /* The EABI says __aeabi_atexit should be used to register static
23940 arm_cxx_use_aeabi_atexit (void)
23942 return TARGET_AAPCS_BASED;
23947 arm_set_return_address (rtx source, rtx scratch)
23949 arm_stack_offsets *offsets;
23950 HOST_WIDE_INT delta;
23952 unsigned long saved_regs;
23954 offsets = arm_get_frame_offsets ();
23955 saved_regs = offsets->saved_regs_mask;
23957 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23958 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23961 if (frame_pointer_needed)
23962 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
23965 /* LR will be the first saved register. */
23966 delta = offsets->outgoing_args - (offsets->frame + 4);
23971 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23972 GEN_INT (delta & ~4095)));
23977 addr = stack_pointer_rtx;
23979 addr = plus_constant (Pmode, addr, delta);
23981 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23987 thumb_set_return_address (rtx source, rtx scratch)
23989 arm_stack_offsets *offsets;
23990 HOST_WIDE_INT delta;
23991 HOST_WIDE_INT limit;
23994 unsigned long mask;
23998 offsets = arm_get_frame_offsets ();
23999 mask = offsets->saved_regs_mask;
24000 if (mask & (1 << LR_REGNUM))
24003 /* Find the saved regs. */
24004 if (frame_pointer_needed)
24006 delta = offsets->soft_frame - offsets->saved_args;
24007 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24013 delta = offsets->outgoing_args - offsets->saved_args;
24016 /* Allow for the stack frame. */
24017 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24019 /* The link register is always the first saved register. */
24022 /* Construct the address. */
24023 addr = gen_rtx_REG (SImode, reg);
24026 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24027 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24031 addr = plus_constant (Pmode, addr, delta);
24033 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24036 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24039 /* Implements target hook vector_mode_supported_p. */
24041 arm_vector_mode_supported_p (enum machine_mode mode)
24043 /* Neon also supports V2SImode, etc. listed in the clause below. */
24044 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24045 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24048 if ((TARGET_NEON || TARGET_IWMMXT)
24049 && ((mode == V2SImode)
24050 || (mode == V4HImode)
24051 || (mode == V8QImode)))
24054 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24055 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24056 || mode == V2HAmode))
24062 /* Implements target hook array_mode_supported_p. */
24065 arm_array_mode_supported_p (enum machine_mode mode,
24066 unsigned HOST_WIDE_INT nelems)
24069 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24070 && (nelems >= 2 && nelems <= 4))
24076 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24077 registers when autovectorizing for Neon, at least until multiple vector
24078 widths are supported properly by the middle-end. */
24080 static enum machine_mode
24081 arm_preferred_simd_mode (enum machine_mode mode)
24087 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24089 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24091 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24093 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24095 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24102 if (TARGET_REALLY_IWMMXT)
24118 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24120 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24121 using r0-r4 for function arguments, r7 for the stack frame and don't have
24122 enough left over to do doubleword arithmetic. For Thumb-2 all the
24123 potentially problematic instructions accept high registers so this is not
24124 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24125 that require many low registers. */
24127 arm_class_likely_spilled_p (reg_class_t rclass)
24129 if ((TARGET_THUMB1 && rclass == LO_REGS)
24130 || rclass == CC_REG)
24136 /* Implements target hook small_register_classes_for_mode_p. */
24138 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24140 return TARGET_THUMB1;
24143 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24144 ARM insns and therefore guarantee that the shift count is modulo 256.
24145 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24146 guarantee no particular behavior for out-of-range counts. */
24148 static unsigned HOST_WIDE_INT
24149 arm_shift_truncation_mask (enum machine_mode mode)
24151 return mode == SImode ? 255 : 0;
24155 /* Map internal gcc register numbers to DWARF2 register numbers. */
24158 arm_dbx_register_number (unsigned int regno)
24163 if (IS_VFP_REGNUM (regno))
24165 /* See comment in arm_dwarf_register_span. */
24166 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24167 return 64 + regno - FIRST_VFP_REGNUM;
24169 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24172 if (IS_IWMMXT_GR_REGNUM (regno))
24173 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24175 if (IS_IWMMXT_REGNUM (regno))
24176 return 112 + regno - FIRST_IWMMXT_REGNUM;
24178 gcc_unreachable ();
24181 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24182 GCC models tham as 64 32-bit registers, so we need to describe this to
24183 the DWARF generation code. Other registers can use the default. */
24185 arm_dwarf_register_span (rtx rtl)
24192 regno = REGNO (rtl);
24193 if (!IS_VFP_REGNUM (regno))
24196 /* XXX FIXME: The EABI defines two VFP register ranges:
24197 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24199 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24200 corresponding D register. Until GDB supports this, we shall use the
24201 legacy encodings. We also use these encodings for D0-D15 for
24202 compatibility with older debuggers. */
24203 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24206 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24207 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24208 regno = (regno - FIRST_VFP_REGNUM) / 2;
24209 for (i = 0; i < nregs; i++)
24210 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24215 #if ARM_UNWIND_INFO
24216 /* Emit unwind directives for a store-multiple instruction or stack pointer
24217 push during alignment.
24218 These should only ever be generated by the function prologue code, so
24219 expect them to have a particular form. */
24222 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24225 HOST_WIDE_INT offset;
24226 HOST_WIDE_INT nregs;
24232 e = XVECEXP (p, 0, 0);
24233 if (GET_CODE (e) != SET)
24236 /* First insn will adjust the stack pointer. */
24237 if (GET_CODE (e) != SET
24238 || GET_CODE (XEXP (e, 0)) != REG
24239 || REGNO (XEXP (e, 0)) != SP_REGNUM
24240 || GET_CODE (XEXP (e, 1)) != PLUS)
24243 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24244 nregs = XVECLEN (p, 0) - 1;
24246 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24249 /* The function prologue may also push pc, but not annotate it as it is
24250 never restored. We turn this into a stack pointer adjustment. */
24251 if (nregs * 4 == offset - 4)
24253 fprintf (asm_out_file, "\t.pad #4\n");
24257 fprintf (asm_out_file, "\t.save {");
24259 else if (IS_VFP_REGNUM (reg))
24262 fprintf (asm_out_file, "\t.vsave {");
24265 /* Unknown register type. */
24268 /* If the stack increment doesn't match the size of the saved registers,
24269 something has gone horribly wrong. */
24270 if (offset != nregs * reg_size)
24275 /* The remaining insns will describe the stores. */
24276 for (i = 1; i <= nregs; i++)
24278 /* Expect (set (mem <addr>) (reg)).
24279 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24280 e = XVECEXP (p, 0, i);
24281 if (GET_CODE (e) != SET
24282 || GET_CODE (XEXP (e, 0)) != MEM
24283 || GET_CODE (XEXP (e, 1)) != REG)
24286 reg = REGNO (XEXP (e, 1));
24291 fprintf (asm_out_file, ", ");
24292 /* We can't use %r for vfp because we need to use the
24293 double precision register names. */
24294 if (IS_VFP_REGNUM (reg))
24295 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24297 asm_fprintf (asm_out_file, "%r", reg);
24299 #ifdef ENABLE_CHECKING
24300 /* Check that the addresses are consecutive. */
24301 e = XEXP (XEXP (e, 0), 0);
24302 if (GET_CODE (e) == PLUS)
24304 offset += reg_size;
24305 if (GET_CODE (XEXP (e, 0)) != REG
24306 || REGNO (XEXP (e, 0)) != SP_REGNUM
24307 || GET_CODE (XEXP (e, 1)) != CONST_INT
24308 || offset != INTVAL (XEXP (e, 1)))
24312 || GET_CODE (e) != REG
24313 || REGNO (e) != SP_REGNUM)
24317 fprintf (asm_out_file, "}\n");
24320 /* Emit unwind directives for a SET. */
24323 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24331 switch (GET_CODE (e0))
24334 /* Pushing a single register. */
24335 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24336 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
24337 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24340 asm_fprintf (asm_out_file, "\t.save ");
24341 if (IS_VFP_REGNUM (REGNO (e1)))
24342 asm_fprintf(asm_out_file, "{d%d}\n",
24343 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24345 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24349 if (REGNO (e0) == SP_REGNUM)
24351 /* A stack increment. */
24352 if (GET_CODE (e1) != PLUS
24353 || GET_CODE (XEXP (e1, 0)) != REG
24354 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24355 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24358 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24359 -INTVAL (XEXP (e1, 1)));
24361 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24363 HOST_WIDE_INT offset;
24365 if (GET_CODE (e1) == PLUS)
24367 if (GET_CODE (XEXP (e1, 0)) != REG
24368 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24370 reg = REGNO (XEXP (e1, 0));
24371 offset = INTVAL (XEXP (e1, 1));
24372 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24373 HARD_FRAME_POINTER_REGNUM, reg,
24376 else if (GET_CODE (e1) == REG)
24379 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24380 HARD_FRAME_POINTER_REGNUM, reg);
24385 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
24387 /* Move from sp to reg. */
24388 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24390 else if (GET_CODE (e1) == PLUS
24391 && GET_CODE (XEXP (e1, 0)) == REG
24392 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24393 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
24395 /* Set reg to offset from sp. */
24396 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24397 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24409 /* Emit unwind directives for the given insn. */
24412 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24415 bool handled_one = false;
24417 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24420 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24421 && (TREE_NOTHROW (current_function_decl)
24422 || crtl->all_throwers_are_sibcalls))
24425 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24428 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24430 pat = XEXP (note, 0);
24431 switch (REG_NOTE_KIND (note))
24433 case REG_FRAME_RELATED_EXPR:
24436 case REG_CFA_REGISTER:
24439 pat = PATTERN (insn);
24440 if (GET_CODE (pat) == PARALLEL)
24441 pat = XVECEXP (pat, 0, 0);
24444 /* Only emitted for IS_STACKALIGN re-alignment. */
24449 src = SET_SRC (pat);
24450 dest = SET_DEST (pat);
24452 gcc_assert (src == stack_pointer_rtx);
24453 reg = REGNO (dest);
24454 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24457 handled_one = true;
24460 case REG_CFA_DEF_CFA:
24461 case REG_CFA_EXPRESSION:
24462 case REG_CFA_ADJUST_CFA:
24463 case REG_CFA_OFFSET:
24464 /* ??? Only handling here what we actually emit. */
24465 gcc_unreachable ();
24473 pat = PATTERN (insn);
24476 switch (GET_CODE (pat))
24479 arm_unwind_emit_set (asm_out_file, pat);
24483 /* Store multiple. */
24484 arm_unwind_emit_sequence (asm_out_file, pat);
24493 /* Output a reference from a function exception table to the type_info
24494 object X. The EABI specifies that the symbol should be relocated by
24495 an R_ARM_TARGET2 relocation. */
24498 arm_output_ttype (rtx x)
24500 fputs ("\t.word\t", asm_out_file);
24501 output_addr_const (asm_out_file, x);
24502 /* Use special relocations for symbol references. */
24503 if (GET_CODE (x) != CONST_INT)
24504 fputs ("(TARGET2)", asm_out_file);
24505 fputc ('\n', asm_out_file);
24510 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24513 arm_asm_emit_except_personality (rtx personality)
24515 fputs ("\t.personality\t", asm_out_file);
24516 output_addr_const (asm_out_file, personality);
24517 fputc ('\n', asm_out_file);
24520 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24523 arm_asm_init_sections (void)
24525 exception_section = get_unnamed_section (0, output_section_asm_op,
24528 #endif /* ARM_UNWIND_INFO */
24530 /* Output unwind directives for the start/end of a function. */
24533 arm_output_fn_unwind (FILE * f, bool prologue)
24535 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24539 fputs ("\t.fnstart\n", f);
24542 /* If this function will never be unwound, then mark it as such.
24543 The came condition is used in arm_unwind_emit to suppress
24544 the frame annotations. */
24545 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24546 && (TREE_NOTHROW (current_function_decl)
24547 || crtl->all_throwers_are_sibcalls))
24548 fputs("\t.cantunwind\n", f);
24550 fputs ("\t.fnend\n", f);
24555 arm_emit_tls_decoration (FILE *fp, rtx x)
24557 enum tls_reloc reloc;
24560 val = XVECEXP (x, 0, 0);
24561 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24563 output_addr_const (fp, val);
24568 fputs ("(tlsgd)", fp);
24571 fputs ("(tlsldm)", fp);
24574 fputs ("(tlsldo)", fp);
24577 fputs ("(gottpoff)", fp);
24580 fputs ("(tpoff)", fp);
24583 fputs ("(tlsdesc)", fp);
24586 gcc_unreachable ();
24595 fputs (" + (. - ", fp);
24596 output_addr_const (fp, XVECEXP (x, 0, 2));
24597 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24598 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24599 output_addr_const (fp, XVECEXP (x, 0, 3));
24609 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24612 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24614 gcc_assert (size == 4);
24615 fputs ("\t.word\t", file);
24616 output_addr_const (file, x);
24617 fputs ("(tlsldo)", file);
24620 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24623 arm_output_addr_const_extra (FILE *fp, rtx x)
24625 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24626 return arm_emit_tls_decoration (fp, x);
24627 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24630 int labelno = INTVAL (XVECEXP (x, 0, 0));
24632 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24633 assemble_name_raw (fp, label);
24637 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24639 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24643 output_addr_const (fp, XVECEXP (x, 0, 0));
24647 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24649 output_addr_const (fp, XVECEXP (x, 0, 0));
24653 output_addr_const (fp, XVECEXP (x, 0, 1));
24657 else if (GET_CODE (x) == CONST_VECTOR)
24658 return arm_emit_vector_const (fp, x);
24663 /* Output assembly for a shift instruction.
24664 SET_FLAGS determines how the instruction modifies the condition codes.
24665 0 - Do not set condition codes.
24666 1 - Set condition codes.
24667 2 - Use smallest instruction. */
24669 arm_output_shift(rtx * operands, int set_flags)
24672 static const char flag_chars[3] = {'?', '.', '!'};
24677 c = flag_chars[set_flags];
24678 if (TARGET_UNIFIED_ASM)
24680 shift = shift_op(operands[3], &val);
24684 operands[2] = GEN_INT(val);
24685 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24688 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24691 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24692 output_asm_insn (pattern, operands);
24696 /* Output assembly for a WMMX immediate shift instruction. */
24698 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24700 int shift = INTVAL (operands[2]);
24702 enum machine_mode opmode = GET_MODE (operands[0]);
24704 gcc_assert (shift >= 0);
24706 /* If the shift value in the register versions is > 63 (for D qualifier),
24707 31 (for W qualifier) or 15 (for H qualifier). */
24708 if (((opmode == V4HImode) && (shift > 15))
24709 || ((opmode == V2SImode) && (shift > 31))
24710 || ((opmode == DImode) && (shift > 63)))
24714 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24715 output_asm_insn (templ, operands);
24716 if (opmode == DImode)
24718 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24719 output_asm_insn (templ, operands);
24724 /* The destination register will contain all zeros. */
24725 sprintf (templ, "wzero\t%%0");
24726 output_asm_insn (templ, operands);
24731 if ((opmode == DImode) && (shift > 32))
24733 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24734 output_asm_insn (templ, operands);
24735 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24736 output_asm_insn (templ, operands);
24740 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24741 output_asm_insn (templ, operands);
24746 /* Output assembly for a WMMX tinsr instruction. */
24748 arm_output_iwmmxt_tinsr (rtx *operands)
24750 int mask = INTVAL (operands[3]);
24753 int units = mode_nunits[GET_MODE (operands[0])];
24754 gcc_assert ((mask & (mask - 1)) == 0);
24755 for (i = 0; i < units; ++i)
24757 if ((mask & 0x01) == 1)
24763 gcc_assert (i < units);
24765 switch (GET_MODE (operands[0]))
24768 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24771 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24774 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24777 gcc_unreachable ();
24780 output_asm_insn (templ, operands);
24785 /* Output a Thumb-1 casesi dispatch sequence. */
24787 thumb1_output_casesi (rtx *operands)
24789 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24791 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24793 switch (GET_MODE(diff_vec))
24796 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24797 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24799 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24800 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24802 return "bl\t%___gnu_thumb1_case_si";
24804 gcc_unreachable ();
24808 /* Output a Thumb-2 casesi instruction. */
24810 thumb2_output_casesi (rtx *operands)
24812 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24814 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24816 output_asm_insn ("cmp\t%0, %1", operands);
24817 output_asm_insn ("bhi\t%l3", operands);
24818 switch (GET_MODE(diff_vec))
24821 return "tbb\t[%|pc, %0]";
24823 return "tbh\t[%|pc, %0, lsl #1]";
24827 output_asm_insn ("adr\t%4, %l2", operands);
24828 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24829 output_asm_insn ("add\t%4, %4, %5", operands);
24834 output_asm_insn ("adr\t%4, %l2", operands);
24835 return "ldr\t%|pc, [%4, %0, lsl #2]";
24838 gcc_unreachable ();
24842 /* Most ARM cores are single issue, but some newer ones can dual issue.
24843 The scheduler descriptions rely on this being correct. */
24845 arm_issue_rate (void)
24867 /* A table and a function to perform ARM-specific name mangling for
24868 NEON vector types in order to conform to the AAPCS (see "Procedure
24869 Call Standard for the ARM Architecture", Appendix A). To qualify
24870 for emission with the mangled names defined in that document, a
24871 vector type must not only be of the correct mode but also be
24872 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24875 enum machine_mode mode;
24876 const char *element_type_name;
24877 const char *aapcs_name;
24878 } arm_mangle_map_entry;
24880 static arm_mangle_map_entry arm_mangle_map[] = {
24881 /* 64-bit containerized types. */
24882 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24883 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24884 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24885 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24886 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24887 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24888 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24889 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24890 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24891 /* 128-bit containerized types. */
24892 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24893 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24894 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24895 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24896 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24897 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24898 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24899 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24900 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24901 { VOIDmode, NULL, NULL }
24905 arm_mangle_type (const_tree type)
24907 arm_mangle_map_entry *pos = arm_mangle_map;
24909 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24910 has to be managled as if it is in the "std" namespace. */
24911 if (TARGET_AAPCS_BASED
24912 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24914 static bool warned;
24915 if (!warned && warn_psabi && !in_system_header)
24918 inform (input_location,
24919 "the mangling of %<va_list%> has changed in GCC 4.4");
24921 return "St9__va_list";
24924 /* Half-precision float. */
24925 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24928 if (TREE_CODE (type) != VECTOR_TYPE)
24931 /* Check the mode of the vector type, and the name of the vector
24932 element type, against the table. */
24933 while (pos->mode != VOIDmode)
24935 tree elt_type = TREE_TYPE (type);
24937 if (pos->mode == TYPE_MODE (type)
24938 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24939 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24940 pos->element_type_name))
24941 return pos->aapcs_name;
24946 /* Use the default mangling for unrecognized (possibly user-defined)
24951 /* Order of allocation of core registers for Thumb: this allocation is
24952 written over the corresponding initial entries of the array
24953 initialized with REG_ALLOC_ORDER. We allocate all low registers
24954 first. Saving and restoring a low register is usually cheaper than
24955 using a call-clobbered high register. */
24957 static const int thumb_core_reg_alloc_order[] =
24959 3, 2, 1, 0, 4, 5, 6, 7,
24960 14, 12, 8, 9, 10, 11
24963 /* Adjust register allocation order when compiling for Thumb. */
24966 arm_order_regs_for_local_alloc (void)
24968 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24969 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24971 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24972 sizeof (thumb_core_reg_alloc_order));
24975 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24978 arm_frame_pointer_required (void)
24980 return (cfun->has_nonlocal_label
24981 || SUBTARGET_FRAME_POINTER_REQUIRED
24982 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24985 /* Only thumb1 can't support conditional execution, so return true if
24986 the target is not thumb1. */
24988 arm_have_conditional_execution (void)
24990 return !TARGET_THUMB1;
24993 static unsigned int
24994 arm_autovectorize_vector_sizes (void)
24996 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25000 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25002 /* Vectors which aren't in packed structures will not be less aligned than
25003 the natural alignment of their element type, so this is safe. */
25004 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25007 return default_builtin_vector_alignment_reachable (type, is_packed);
25011 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25012 const_tree type, int misalignment,
25015 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25017 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25022 /* If the misalignment is unknown, we should be able to handle the access
25023 so long as it is not to a member of a packed data structure. */
25024 if (misalignment == -1)
25027 /* Return true if the misalignment is a multiple of the natural alignment
25028 of the vector's element type. This is probably always going to be
25029 true in practice, since we've already established that this isn't a
25031 return ((misalignment % align) == 0);
25034 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25039 arm_conditional_register_usage (void)
25043 if (TARGET_THUMB1 && optimize_size)
25045 /* When optimizing for size on Thumb-1, it's better not
25046 to use the HI regs, because of the overhead of
25048 for (regno = FIRST_HI_REGNUM;
25049 regno <= LAST_HI_REGNUM; ++regno)
25050 fixed_regs[regno] = call_used_regs[regno] = 1;
25053 /* The link register can be clobbered by any branch insn,
25054 but we have no way to track that at present, so mark
25055 it as unavailable. */
25057 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25059 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25061 /* VFPv3 registers are disabled when earlier VFP
25062 versions are selected due to the definition of
25063 LAST_VFP_REGNUM. */
25064 for (regno = FIRST_VFP_REGNUM;
25065 regno <= LAST_VFP_REGNUM; ++ regno)
25067 fixed_regs[regno] = 0;
25068 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25069 || regno >= FIRST_VFP_REGNUM + 32;
25073 if (TARGET_REALLY_IWMMXT)
25075 regno = FIRST_IWMMXT_GR_REGNUM;
25076 /* The 2002/10/09 revision of the XScale ABI has wCG0
25077 and wCG1 as call-preserved registers. The 2002/11/21
25078 revision changed this so that all wCG registers are
25079 scratch registers. */
25080 for (regno = FIRST_IWMMXT_GR_REGNUM;
25081 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25082 fixed_regs[regno] = 0;
25083 /* The XScale ABI has wR0 - wR9 as scratch registers,
25084 the rest as call-preserved registers. */
25085 for (regno = FIRST_IWMMXT_REGNUM;
25086 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25088 fixed_regs[regno] = 0;
25089 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25093 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25095 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25096 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25098 else if (TARGET_APCS_STACK)
25100 fixed_regs[10] = 1;
25101 call_used_regs[10] = 1;
25103 /* -mcaller-super-interworking reserves r11 for calls to
25104 _interwork_r11_call_via_rN(). Making the register global
25105 is an easy way of ensuring that it remains valid for all
25107 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25108 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25110 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25111 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25112 if (TARGET_CALLER_INTERWORKING)
25113 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25115 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25119 arm_preferred_rename_class (reg_class_t rclass)
25121 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25122 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25123 and code size can be reduced. */
25124 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25130 /* Compute the atrribute "length" of insn "*push_multi".
25131 So this function MUST be kept in sync with that insn pattern. */
25133 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25135 int i, regno, hi_reg;
25136 int num_saves = XVECLEN (parallel_op, 0);
25146 regno = REGNO (first_op);
25147 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25148 for (i = 1; i < num_saves && !hi_reg; i++)
25150 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25151 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25159 /* Compute the number of instructions emitted by output_move_double. */
25161 arm_count_output_move_double_insns (rtx *operands)
25165 /* output_move_double may modify the operands array, so call it
25166 here on a copy of the array. */
25167 ops[0] = operands[0];
25168 ops[1] = operands[1];
25169 output_move_double (ops, false, &count);
25174 vfp3_const_double_for_fract_bits (rtx operand)
25176 REAL_VALUE_TYPE r0;
25178 if (GET_CODE (operand) != CONST_DOUBLE)
25181 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25182 if (exact_real_inverse (DFmode, &r0))
25184 if (exact_real_truncate (DFmode, &r0))
25186 HOST_WIDE_INT value = real_to_integer (&r0);
25187 value = value & 0xffffffff;
25188 if ((value != 0) && ( (value & (value - 1)) == 0))
25189 return int_log2 (value);
25195 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25198 arm_pre_atomic_barrier (enum memmodel model)
25200 if (need_atomic_barrier_p (model, true))
25201 emit_insn (gen_memory_barrier ());
25205 arm_post_atomic_barrier (enum memmodel model)
25207 if (need_atomic_barrier_p (model, false))
25208 emit_insn (gen_memory_barrier ());
25211 /* Emit the load-exclusive and store-exclusive instructions. */
25214 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25216 rtx (*gen) (rtx, rtx);
25220 case QImode: gen = gen_arm_load_exclusiveqi; break;
25221 case HImode: gen = gen_arm_load_exclusivehi; break;
25222 case SImode: gen = gen_arm_load_exclusivesi; break;
25223 case DImode: gen = gen_arm_load_exclusivedi; break;
25225 gcc_unreachable ();
25228 emit_insn (gen (rval, mem));
25232 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25234 rtx (*gen) (rtx, rtx, rtx);
25238 case QImode: gen = gen_arm_store_exclusiveqi; break;
25239 case HImode: gen = gen_arm_store_exclusivehi; break;
25240 case SImode: gen = gen_arm_store_exclusivesi; break;
25241 case DImode: gen = gen_arm_store_exclusivedi; break;
25243 gcc_unreachable ();
25246 emit_insn (gen (bval, rval, mem));
25249 /* Mark the previous jump instruction as unlikely. */
25252 emit_unlikely_jump (rtx insn)
25254 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25256 insn = emit_jump_insn (insn);
25257 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25260 /* Expand a compare and swap pattern. */
25263 arm_expand_compare_and_swap (rtx operands[])
25265 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25266 enum machine_mode mode;
25267 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25269 bval = operands[0];
25270 rval = operands[1];
25272 oldval = operands[3];
25273 newval = operands[4];
25274 is_weak = operands[5];
25275 mod_s = operands[6];
25276 mod_f = operands[7];
25277 mode = GET_MODE (mem);
25283 /* For narrow modes, we're going to perform the comparison in SImode,
25284 so do the zero-extension now. */
25285 rval = gen_reg_rtx (SImode);
25286 oldval = convert_modes (SImode, mode, oldval, true);
25290 /* Force the value into a register if needed. We waited until after
25291 the zero-extension above to do this properly. */
25292 if (!arm_add_operand (oldval, mode))
25293 oldval = force_reg (mode, oldval);
25297 if (!cmpdi_operand (oldval, mode))
25298 oldval = force_reg (mode, oldval);
25302 gcc_unreachable ();
25307 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25308 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25309 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25310 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25312 gcc_unreachable ();
25315 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25317 if (mode == QImode || mode == HImode)
25318 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25320 /* In all cases, we arrange for success to be signaled by Z set.
25321 This arrangement allows for the boolean result to be used directly
25322 in a subsequent branch, post optimization. */
25323 x = gen_rtx_REG (CCmode, CC_REGNUM);
25324 x = gen_rtx_EQ (SImode, x, const0_rtx);
25325 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25328 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25329 another memory store between the load-exclusive and store-exclusive can
25330 reset the monitor from Exclusive to Open state. This means we must wait
25331 until after reload to split the pattern, lest we get a register spill in
25332 the middle of the atomic sequence. */
25335 arm_split_compare_and_swap (rtx operands[])
25337 rtx rval, mem, oldval, newval, scratch;
25338 enum machine_mode mode;
25339 enum memmodel mod_s, mod_f;
25341 rtx label1, label2, x, cond;
25343 rval = operands[0];
25345 oldval = operands[2];
25346 newval = operands[3];
25347 is_weak = (operands[4] != const0_rtx);
25348 mod_s = (enum memmodel) INTVAL (operands[5]);
25349 mod_f = (enum memmodel) INTVAL (operands[6]);
25350 scratch = operands[7];
25351 mode = GET_MODE (mem);
25353 arm_pre_atomic_barrier (mod_s);
25358 label1 = gen_label_rtx ();
25359 emit_label (label1);
25361 label2 = gen_label_rtx ();
25363 arm_emit_load_exclusive (mode, rval, mem);
25365 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25366 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25367 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25368 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25369 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25371 arm_emit_store_exclusive (mode, scratch, mem, newval);
25373 /* Weak or strong, we want EQ to be true for success, so that we
25374 match the flags that we got from the compare above. */
25375 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25376 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25377 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25381 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25382 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25383 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25384 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25387 if (mod_f != MEMMODEL_RELAXED)
25388 emit_label (label2);
25390 arm_post_atomic_barrier (mod_s);
25392 if (mod_f == MEMMODEL_RELAXED)
25393 emit_label (label2);
25397 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25398 rtx value, rtx model_rtx, rtx cond)
25400 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25401 enum machine_mode mode = GET_MODE (mem);
25402 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25405 arm_pre_atomic_barrier (model);
25407 label = gen_label_rtx ();
25408 emit_label (label);
25411 new_out = gen_lowpart (wmode, new_out);
25413 old_out = gen_lowpart (wmode, old_out);
25416 value = simplify_gen_subreg (wmode, value, mode, 0);
25418 arm_emit_load_exclusive (mode, old_out, mem);
25427 x = gen_rtx_AND (wmode, old_out, value);
25428 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25429 x = gen_rtx_NOT (wmode, new_out);
25430 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25434 if (CONST_INT_P (value))
25436 value = GEN_INT (-INTVAL (value));
25442 if (mode == DImode)
25444 /* DImode plus/minus need to clobber flags. */
25445 /* The adddi3 and subdi3 patterns are incorrectly written so that
25446 they require matching operands, even when we could easily support
25447 three operands. Thankfully, this can be fixed up post-splitting,
25448 as the individual add+adc patterns do accept three operands and
25449 post-reload cprop can make these moves go away. */
25450 emit_move_insn (new_out, old_out);
25452 x = gen_adddi3 (new_out, new_out, value);
25454 x = gen_subdi3 (new_out, new_out, value);
25461 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25462 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25466 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25468 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25469 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25471 arm_post_atomic_barrier (model);
25474 #define MAX_VECT_LEN 16
25476 struct expand_vec_perm_d
25478 rtx target, op0, op1;
25479 unsigned char perm[MAX_VECT_LEN];
25480 enum machine_mode vmode;
25481 unsigned char nelt;
25486 /* Generate a variable permutation. */
25489 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25491 enum machine_mode vmode = GET_MODE (target);
25492 bool one_vector_p = rtx_equal_p (op0, op1);
25494 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25495 gcc_checking_assert (GET_MODE (op0) == vmode);
25496 gcc_checking_assert (GET_MODE (op1) == vmode);
25497 gcc_checking_assert (GET_MODE (sel) == vmode);
25498 gcc_checking_assert (TARGET_NEON);
25502 if (vmode == V8QImode)
25503 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25505 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25511 if (vmode == V8QImode)
25513 pair = gen_reg_rtx (V16QImode);
25514 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25515 pair = gen_lowpart (TImode, pair);
25516 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25520 pair = gen_reg_rtx (OImode);
25521 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25522 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25528 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25530 enum machine_mode vmode = GET_MODE (target);
25531 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25532 bool one_vector_p = rtx_equal_p (op0, op1);
25533 rtx rmask[MAX_VECT_LEN], mask;
25535 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25536 numbering of elements for big-endian, we must reverse the order. */
25537 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25539 /* The VTBL instruction does not use a modulo index, so we must take care
25540 of that ourselves. */
25541 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25542 for (i = 0; i < nelt; ++i)
25544 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25545 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25547 arm_expand_vec_perm_1 (target, op0, op1, sel);
25550 /* Generate or test for an insn that supports a constant permutation. */
25552 /* Recognize patterns for the VUZP insns. */
25555 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25557 unsigned int i, odd, mask, nelt = d->nelt;
25558 rtx out0, out1, in0, in1, x;
25559 rtx (*gen)(rtx, rtx, rtx, rtx);
25561 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25564 /* Note that these are little-endian tests. Adjust for big-endian later. */
25565 if (d->perm[0] == 0)
25567 else if (d->perm[0] == 1)
25571 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25573 for (i = 0; i < nelt; i++)
25575 unsigned elt = (i * 2 + odd) & mask;
25576 if (d->perm[i] != elt)
25586 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25587 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25588 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25589 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25590 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25591 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25592 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25593 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25595 gcc_unreachable ();
25600 if (BYTES_BIG_ENDIAN)
25602 x = in0, in0 = in1, in1 = x;
25607 out1 = gen_reg_rtx (d->vmode);
25609 x = out0, out0 = out1, out1 = x;
25611 emit_insn (gen (out0, in0, in1, out1));
25615 /* Recognize patterns for the VZIP insns. */
25618 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25620 unsigned int i, high, mask, nelt = d->nelt;
25621 rtx out0, out1, in0, in1, x;
25622 rtx (*gen)(rtx, rtx, rtx, rtx);
25624 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25627 /* Note that these are little-endian tests. Adjust for big-endian later. */
25629 if (d->perm[0] == high)
25631 else if (d->perm[0] == 0)
25635 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25637 for (i = 0; i < nelt / 2; i++)
25639 unsigned elt = (i + high) & mask;
25640 if (d->perm[i * 2] != elt)
25642 elt = (elt + nelt) & mask;
25643 if (d->perm[i * 2 + 1] != elt)
25653 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25654 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25655 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25656 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25657 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25658 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25659 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25660 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25662 gcc_unreachable ();
25667 if (BYTES_BIG_ENDIAN)
25669 x = in0, in0 = in1, in1 = x;
25674 out1 = gen_reg_rtx (d->vmode);
25676 x = out0, out0 = out1, out1 = x;
25678 emit_insn (gen (out0, in0, in1, out1));
25682 /* Recognize patterns for the VREV insns. */
25685 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25687 unsigned int i, j, diff, nelt = d->nelt;
25688 rtx (*gen)(rtx, rtx, rtx);
25690 if (!d->one_vector_p)
25699 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25700 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25708 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25709 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25710 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25711 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25719 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25720 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25721 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25722 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25723 case V4SImode: gen = gen_neon_vrev64v4si; break;
25724 case V2SImode: gen = gen_neon_vrev64v2si; break;
25725 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25726 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25735 for (i = 0; i < nelt ; i += diff + 1)
25736 for (j = 0; j <= diff; j += 1)
25738 /* This is guaranteed to be true as the value of diff
25739 is 7, 3, 1 and we should have enough elements in the
25740 queue to generate this. Getting a vector mask with a
25741 value of diff other than these values implies that
25742 something is wrong by the time we get here. */
25743 gcc_assert (i + j < nelt);
25744 if (d->perm[i + j] != i + diff - j)
25752 /* ??? The third operand is an artifact of the builtin infrastructure
25753 and is ignored by the actual instruction. */
25754 emit_insn (gen (d->target, d->op0, const0_rtx));
25758 /* Recognize patterns for the VTRN insns. */
25761 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25763 unsigned int i, odd, mask, nelt = d->nelt;
25764 rtx out0, out1, in0, in1, x;
25765 rtx (*gen)(rtx, rtx, rtx, rtx);
25767 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25770 /* Note that these are little-endian tests. Adjust for big-endian later. */
25771 if (d->perm[0] == 0)
25773 else if (d->perm[0] == 1)
25777 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25779 for (i = 0; i < nelt; i += 2)
25781 if (d->perm[i] != i + odd)
25783 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25793 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25794 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25795 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25796 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25797 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25798 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25799 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25800 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25802 gcc_unreachable ();
25807 if (BYTES_BIG_ENDIAN)
25809 x = in0, in0 = in1, in1 = x;
25814 out1 = gen_reg_rtx (d->vmode);
25816 x = out0, out0 = out1, out1 = x;
25818 emit_insn (gen (out0, in0, in1, out1));
25822 /* The NEON VTBL instruction is a fully variable permuation that's even
25823 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25824 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25825 can do slightly better by expanding this as a constant where we don't
25826 have to apply a mask. */
25829 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25831 rtx rperm[MAX_VECT_LEN], sel;
25832 enum machine_mode vmode = d->vmode;
25833 unsigned int i, nelt = d->nelt;
25835 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25836 numbering of elements for big-endian, we must reverse the order. */
25837 if (BYTES_BIG_ENDIAN)
25843 /* Generic code will try constant permutation twice. Once with the
25844 original mode and again with the elements lowered to QImode.
25845 So wait and don't do the selector expansion ourselves. */
25846 if (vmode != V8QImode && vmode != V16QImode)
25849 for (i = 0; i < nelt; ++i)
25850 rperm[i] = GEN_INT (d->perm[i]);
25851 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25852 sel = force_reg (vmode, sel);
25854 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25859 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25861 /* The pattern matching functions above are written to look for a small
25862 number to begin the sequence (0, 1, N/2). If we begin with an index
25863 from the second operand, we can swap the operands. */
25864 if (d->perm[0] >= d->nelt)
25866 unsigned i, nelt = d->nelt;
25869 for (i = 0; i < nelt; ++i)
25870 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25879 if (arm_evpc_neon_vuzp (d))
25881 if (arm_evpc_neon_vzip (d))
25883 if (arm_evpc_neon_vrev (d))
25885 if (arm_evpc_neon_vtrn (d))
25887 return arm_evpc_neon_vtbl (d);
25892 /* Expand a vec_perm_const pattern. */
25895 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25897 struct expand_vec_perm_d d;
25898 int i, nelt, which;
25904 d.vmode = GET_MODE (target);
25905 gcc_assert (VECTOR_MODE_P (d.vmode));
25906 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25907 d.testing_p = false;
25909 for (i = which = 0; i < nelt; ++i)
25911 rtx e = XVECEXP (sel, 0, i);
25912 int ei = INTVAL (e) & (2 * nelt - 1);
25913 which |= (ei < nelt ? 1 : 2);
25923 d.one_vector_p = false;
25924 if (!rtx_equal_p (op0, op1))
25927 /* The elements of PERM do not suggest that only the first operand
25928 is used, but both operands are identical. Allow easier matching
25929 of the permutation by folding the permutation into the single
25933 for (i = 0; i < nelt; ++i)
25934 d.perm[i] &= nelt - 1;
25936 d.one_vector_p = true;
25941 d.one_vector_p = true;
25945 return arm_expand_vec_perm_const_1 (&d);
25948 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
25951 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25952 const unsigned char *sel)
25954 struct expand_vec_perm_d d;
25955 unsigned int i, nelt, which;
25959 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25960 d.testing_p = true;
25961 memcpy (d.perm, sel, nelt);
25963 /* Categorize the set of elements in the selector. */
25964 for (i = which = 0; i < nelt; ++i)
25966 unsigned char e = d.perm[i];
25967 gcc_assert (e < 2 * nelt);
25968 which |= (e < nelt ? 1 : 2);
25971 /* For all elements from second vector, fold the elements to first. */
25973 for (i = 0; i < nelt; ++i)
25976 /* Check whether the mask can be applied to the vector type. */
25977 d.one_vector_p = (which != 3);
25979 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25980 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25981 if (!d.one_vector_p)
25982 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25985 ret = arm_expand_vec_perm_const_1 (&d);
25992 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
25994 /* If we are soft float and we do not have ldrd
25995 then all auto increment forms are ok. */
25996 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26001 /* Post increment and Pre Decrement are supported for all
26002 instruction forms except for vector forms. */
26005 if (VECTOR_MODE_P (mode))
26007 if (code != ARM_PRE_DEC)
26017 /* Without LDRD and mode size greater than
26018 word size, there is no point in auto-incrementing
26019 because ldm and stm will not have these forms. */
26020 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26023 /* Vector and floating point modes do not support
26024 these auto increment forms. */
26025 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26038 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26039 on ARM, since we know that shifts by negative amounts are no-ops.
26040 Additionally, the default expansion code is not available or suitable
26041 for post-reload insn splits (this can occur when the register allocator
26042 chooses not to do a shift in NEON).
26044 This function is used in both initial expand and post-reload splits, and
26045 handles all kinds of 64-bit shifts.
26047 Input requirements:
26048 - It is safe for the input and output to be the same register, but
26049 early-clobber rules apply for the shift amount and scratch registers.
26050 - Shift by register requires both scratch registers. Shift by a constant
26051 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26052 the scratch registers may be NULL.
26053 - Ashiftrt by a register also clobbers the CC register. */
26055 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26056 rtx amount, rtx scratch1, rtx scratch2)
26058 rtx out_high = gen_highpart (SImode, out);
26059 rtx out_low = gen_lowpart (SImode, out);
26060 rtx in_high = gen_highpart (SImode, in);
26061 rtx in_low = gen_lowpart (SImode, in);
26064 in = the register pair containing the input value.
26065 out = the destination register pair.
26066 up = the high- or low-part of each pair.
26067 down = the opposite part to "up".
26068 In a shift, we can consider bits to shift from "up"-stream to
26069 "down"-stream, so in a left-shift "up" is the low-part and "down"
26070 is the high-part of each register pair. */
26072 rtx out_up = code == ASHIFT ? out_low : out_high;
26073 rtx out_down = code == ASHIFT ? out_high : out_low;
26074 rtx in_up = code == ASHIFT ? in_low : in_high;
26075 rtx in_down = code == ASHIFT ? in_high : in_low;
26077 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26079 && (REG_P (out) || GET_CODE (out) == SUBREG)
26080 && GET_MODE (out) == DImode);
26082 && (REG_P (in) || GET_CODE (in) == SUBREG)
26083 && GET_MODE (in) == DImode);
26085 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26086 && GET_MODE (amount) == SImode)
26087 || CONST_INT_P (amount)));
26088 gcc_assert (scratch1 == NULL
26089 || (GET_CODE (scratch1) == SCRATCH)
26090 || (GET_MODE (scratch1) == SImode
26091 && REG_P (scratch1)));
26092 gcc_assert (scratch2 == NULL
26093 || (GET_CODE (scratch2) == SCRATCH)
26094 || (GET_MODE (scratch2) == SImode
26095 && REG_P (scratch2)));
26096 gcc_assert (!REG_P (out) || !REG_P (amount)
26097 || !HARD_REGISTER_P (out)
26098 || (REGNO (out) != REGNO (amount)
26099 && REGNO (out) + 1 != REGNO (amount)));
26101 /* Macros to make following code more readable. */
26102 #define SUB_32(DEST,SRC) \
26103 gen_addsi3 ((DEST), (SRC), gen_rtx_CONST_INT (VOIDmode, -32))
26104 #define RSB_32(DEST,SRC) \
26105 gen_subsi3 ((DEST), gen_rtx_CONST_INT (VOIDmode, 32), (SRC))
26106 #define SUB_S_32(DEST,SRC) \
26107 gen_addsi3_compare0 ((DEST), (SRC), \
26108 gen_rtx_CONST_INT (VOIDmode, -32))
26109 #define SET(DEST,SRC) \
26110 gen_rtx_SET (SImode, (DEST), (SRC))
26111 #define SHIFT(CODE,SRC,AMOUNT) \
26112 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26113 #define LSHIFT(CODE,SRC,AMOUNT) \
26114 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26115 SImode, (SRC), (AMOUNT))
26116 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26117 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26118 SImode, (SRC), (AMOUNT))
26120 gen_rtx_IOR (SImode, (A), (B))
26121 #define BRANCH(COND,LABEL) \
26122 gen_arm_cond_branch ((LABEL), \
26123 gen_rtx_ ## COND (CCmode, cc_reg, \
26127 /* Shifts by register and shifts by constant are handled separately. */
26128 if (CONST_INT_P (amount))
26130 /* We have a shift-by-constant. */
26132 /* First, handle out-of-range shift amounts.
26133 In both cases we try to match the result an ARM instruction in a
26134 shift-by-register would give. This helps reduce execution
26135 differences between optimization levels, but it won't stop other
26136 parts of the compiler doing different things. This is "undefined
26137 behaviour, in any case. */
26138 if (INTVAL (amount) <= 0)
26139 emit_insn (gen_movdi (out, in));
26140 else if (INTVAL (amount) >= 64)
26142 if (code == ASHIFTRT)
26144 rtx const31_rtx = gen_rtx_CONST_INT (VOIDmode, 31);
26145 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26146 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26149 emit_insn (gen_movdi (out, const0_rtx));
26152 /* Now handle valid shifts. */
26153 else if (INTVAL (amount) < 32)
26155 /* Shifts by a constant less than 32. */
26156 rtx reverse_amount = gen_rtx_CONST_INT (VOIDmode,
26157 32 - INTVAL (amount));
26159 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26160 emit_insn (SET (out_down,
26161 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26163 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26167 /* Shifts by a constant greater than 31. */
26168 rtx adj_amount = gen_rtx_CONST_INT (VOIDmode, INTVAL (amount) - 32);
26170 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26171 if (code == ASHIFTRT)
26172 emit_insn (gen_ashrsi3 (out_up, in_up,
26173 gen_rtx_CONST_INT (VOIDmode, 31)));
26175 emit_insn (SET (out_up, const0_rtx));
26180 /* We have a shift-by-register. */
26181 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26183 /* This alternative requires the scratch registers. */
26184 gcc_assert (scratch1 && REG_P (scratch1));
26185 gcc_assert (scratch2 && REG_P (scratch2));
26187 /* We will need the values "amount-32" and "32-amount" later.
26188 Swapping them around now allows the later code to be more general. */
26192 emit_insn (SUB_32 (scratch1, amount));
26193 emit_insn (RSB_32 (scratch2, amount));
26196 emit_insn (RSB_32 (scratch1, amount));
26197 /* Also set CC = amount > 32. */
26198 emit_insn (SUB_S_32 (scratch2, amount));
26201 emit_insn (RSB_32 (scratch1, amount));
26202 emit_insn (SUB_32 (scratch2, amount));
26205 gcc_unreachable ();
26208 /* Emit code like this:
26211 out_down = in_down << amount;
26212 out_down = (in_up << (amount - 32)) | out_down;
26213 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26214 out_up = in_up << amount;
26217 out_down = in_down >> amount;
26218 out_down = (in_up << (32 - amount)) | out_down;
26220 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26221 out_up = in_up << amount;
26224 out_down = in_down >> amount;
26225 out_down = (in_up << (32 - amount)) | out_down;
26227 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26228 out_up = in_up << amount;
26230 The ARM and Thumb2 variants are the same but implemented slightly
26231 differently. If this were only called during expand we could just
26232 use the Thumb2 case and let combine do the right thing, but this
26233 can also be called from post-reload splitters. */
26235 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26237 if (!TARGET_THUMB2)
26239 /* Emit code for ARM mode. */
26240 emit_insn (SET (out_down,
26241 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26242 if (code == ASHIFTRT)
26244 rtx done_label = gen_label_rtx ();
26245 emit_jump_insn (BRANCH (LT, done_label));
26246 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26248 emit_label (done_label);
26251 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26256 /* Emit code for Thumb2 mode.
26257 Thumb2 can't do shift and or in one insn. */
26258 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26259 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26261 if (code == ASHIFTRT)
26263 rtx done_label = gen_label_rtx ();
26264 emit_jump_insn (BRANCH (LT, done_label));
26265 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26266 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26267 emit_label (done_label);
26271 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26272 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26276 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26291 /* Returns true if a valid comparison operation and makes
26292 the operands in a form that is valid. */
26294 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26296 enum rtx_code code = GET_CODE (*comparison);
26297 enum rtx_code canonical_code;
26298 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26299 ? GET_MODE (*op2) : GET_MODE (*op1);
26301 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26303 if (code == UNEQ || code == LTGT)
26306 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26307 PUT_CODE (*comparison, canonical_code);
26312 if (!arm_add_operand (*op1, mode))
26313 *op1 = force_reg (mode, *op1);
26314 if (!arm_add_operand (*op2, mode))
26315 *op2 = force_reg (mode, *op2);
26319 if (!cmpdi_operand (*op1, mode))
26320 *op1 = force_reg (mode, *op1);
26321 if (!cmpdi_operand (*op2, mode))
26322 *op2 = force_reg (mode, *op2);
26327 if (!arm_float_compare_operand (*op1, mode))
26328 *op1 = force_reg (mode, *op1);
26329 if (!arm_float_compare_operand (*op2, mode))
26330 *op2 = force_reg (mode, *op2);
26340 #include "gt-arm.h"