1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2013 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
26 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
38 #include "diagnostic-core.h"
43 #include "target-def.h"
44 #include "langhooks.h"
45 #include "basic-block.h"
48 #include "sched-int.h"
53 #include "alloc-pool.h"
54 #include "tm-constrs.h"
56 #include "tree-pass.h"
57 #include "pass_manager.h"
64 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
68 && ((HOST_WIDE_INT)(VALUE)) <= 511)
70 #define CONST_OK_FOR_ADD(size) \
71 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
72 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
73 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
74 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
76 /* Used to simplify the logic below. Find the attributes wherever
78 #define SH_ATTRIBUTES(decl) \
79 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
80 : DECL_ATTRIBUTES (decl) \
81 ? (DECL_ATTRIBUTES (decl)) \
82 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
84 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
85 int current_function_interrupt;
87 tree sh_deferred_function_attributes;
88 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
90 /* Global variables for machine-dependent things. */
92 /* Which cpu are we scheduling for. */
93 enum processor_type sh_cpu;
95 /* Definitions used in ready queue reordering for first scheduling pass. */
97 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
98 static short *regmode_weight[2];
100 /* Total SFmode and SImode weights of scheduled insns. */
101 static int curr_regmode_pressure[2];
103 /* Number of r0 life regions. */
104 static int r0_life_regions;
106 /* If true, skip cycles for Q -> R movement. */
107 static int skip_cycles = 0;
109 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
110 and returned from sh_reorder2. */
111 static short cached_can_issue_more;
113 /* Unique number for UNSPEC_BBR pattern. */
114 static unsigned int unspec_bbr_uid = 1;
116 /* Provides the class number of the smallest class containing
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static bool broken_move (rtx);
178 static bool mova_p (rtx);
179 static rtx find_barrier (int, rtx, rtx);
180 static bool noncall_uses_reg (rtx, rtx, rtx *);
181 static rtx gen_block_redirect (rtx, int, int);
182 static void sh_reorg (void);
183 static void sh_option_override (void);
184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
185 static rtx frame_insn (rtx);
186 static rtx push (int);
187 static void pop (int);
188 static void push_regs (HARD_REG_SET *, int);
189 static int calc_live_regs (HARD_REG_SET *);
190 static HOST_WIDE_INT rounded_frame_size (int);
191 static bool sh_frame_pointer_required (void);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
206 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
207 static void sh_insert_attributes (tree, tree *);
208 static const char *sh_check_pch_target_flags (int);
209 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
210 static int sh_adjust_cost (rtx, rtx, rtx, int);
211 static int sh_issue_rate (void);
212 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
213 static short find_set_regmode_weight (rtx, enum machine_mode);
214 static short find_insn_regmode_weight (rtx, enum machine_mode);
215 static void find_regmode_weight (basic_block, enum machine_mode);
216 static int find_r0_life_regions (basic_block);
217 static void sh_md_init_global (FILE *, int, int);
218 static void sh_md_finish_global (FILE *, int);
219 static int rank_for_reorder (const void *, const void *);
220 static void swap_reorder (rtx *, int);
221 static void ready_reorder (rtx *, int);
222 static bool high_pressure (enum machine_mode);
223 static int sh_reorder (FILE *, int, rtx *, int *, int);
224 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
225 static void sh_md_init (FILE *, int, int);
226 static int sh_variable_issue (FILE *, int, rtx, int);
228 static bool sh_function_ok_for_sibcall (tree, tree);
230 static bool sh_cannot_modify_jumps_p (void);
231 static reg_class_t sh_target_reg_class (void);
232 static bool sh_optimize_target_register_callee_saved (bool);
233 static bool sh_ms_bitfield_layout_p (const_tree);
235 static void sh_init_builtins (void);
236 static tree sh_builtin_decl (unsigned, bool);
237 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
238 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
239 HOST_WIDE_INT, tree);
240 static void sh_file_start (void);
241 static bool flow_dependent_p (rtx, rtx);
242 static void flow_dependent_p_1 (rtx, const_rtx, void *);
243 static int shiftcosts (rtx);
244 static int and_xor_ior_costs (rtx, int);
245 static int addsubcosts (rtx);
246 static int multcosts (rtx);
247 static bool unspec_caller_rtx_p (rtx);
248 static bool sh_cannot_copy_insn_p (rtx);
249 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
250 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
251 static int sh_pr_n_sets (void);
252 static rtx sh_allocate_initial_value (rtx);
253 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
254 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
256 struct secondary_reload_info *);
257 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
258 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
259 static rtx sh_delegitimize_address (rtx);
260 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
261 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
262 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
263 static int scavenge_reg (HARD_REG_SET *s);
264 struct save_schedule_s;
265 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
266 struct save_schedule_s *, int);
268 static rtx sh_struct_value_rtx (tree, int);
269 static rtx sh_function_value (const_tree, const_tree, bool);
270 static bool sh_function_value_regno_p (const unsigned int);
271 static rtx sh_libcall_value (enum machine_mode, const_rtx);
272 static bool sh_return_in_memory (const_tree, const_tree);
273 static rtx sh_builtin_saveregs (void);
274 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
276 static bool sh_strict_argument_naming (cumulative_args_t);
277 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
278 static tree sh_build_builtin_va_list (void);
279 static void sh_va_start (tree, rtx);
280 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
281 static bool sh_promote_prototypes (const_tree);
282 static enum machine_mode sh_promote_function_mode (const_tree type,
287 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
289 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
291 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
293 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
295 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
297 static bool sh_scalar_mode_supported_p (enum machine_mode);
298 static int sh_dwarf_calling_convention (const_tree);
299 static void sh_encode_section_info (tree, rtx, int);
300 static bool sh2a_function_vector_p (tree);
301 static void sh_trampoline_init (rtx, tree, rtx);
302 static rtx sh_trampoline_adjust_address (rtx);
303 static void sh_conditional_register_usage (void);
304 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
305 static int mov_insn_size (enum machine_mode, bool);
306 static int max_mov_insn_displacement (enum machine_mode, bool);
307 static int mov_insn_alignment_mask (enum machine_mode, bool);
308 static HOST_WIDE_INT disp_addr_displacement (rtx);
309 static bool sequence_insn_p (rtx);
310 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
311 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
312 enum machine_mode, bool);
313 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
315 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
317 static const struct attribute_spec sh_attribute_table[] =
319 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
320 affects_type_identity } */
321 { "interrupt_handler", 0, 0, true, false, false,
322 sh_handle_interrupt_handler_attribute, false },
323 { "sp_switch", 1, 1, true, false, false,
324 sh_handle_sp_switch_attribute, false },
325 { "trap_exit", 1, 1, true, false, false,
326 sh_handle_trap_exit_attribute, false },
327 { "renesas", 0, 0, false, true, false,
328 sh_handle_renesas_attribute, false },
329 { "trapa_handler", 0, 0, true, false, false,
330 sh_handle_interrupt_handler_attribute, false },
331 { "nosave_low_regs", 0, 0, true, false, false,
332 sh_handle_interrupt_handler_attribute, false },
333 { "resbank", 0, 0, true, false, false,
334 sh_handle_resbank_handler_attribute, false },
335 { "function_vector", 1, 1, true, false, false,
336 sh2a_handle_function_vector_handler_attribute, false },
337 { NULL, 0, 0, false, false, false, NULL, false }
340 /* Initialize the GCC target structure. */
341 #undef TARGET_ATTRIBUTE_TABLE
342 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
344 /* The next two are used for debug info when compiling with -gdwarf. */
345 #undef TARGET_ASM_UNALIGNED_HI_OP
346 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
347 #undef TARGET_ASM_UNALIGNED_SI_OP
348 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
350 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
351 #undef TARGET_ASM_UNALIGNED_DI_OP
352 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
353 #undef TARGET_ASM_ALIGNED_DI_OP
354 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE sh_option_override
359 #undef TARGET_PRINT_OPERAND
360 #define TARGET_PRINT_OPERAND sh_print_operand
361 #undef TARGET_PRINT_OPERAND_ADDRESS
362 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
363 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
364 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
368 #undef TARGET_ASM_FUNCTION_EPILOGUE
369 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
371 #undef TARGET_ASM_OUTPUT_MI_THUNK
372 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
374 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
375 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
376 hook_bool_const_tree_hwi_hwi_const_tree_true
378 #undef TARGET_ASM_FILE_START
379 #define TARGET_ASM_FILE_START sh_file_start
380 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
381 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
383 #undef TARGET_REGISTER_MOVE_COST
384 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
386 #undef TARGET_INSERT_ATTRIBUTES
387 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
392 #undef TARGET_SCHED_ISSUE_RATE
393 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
395 /* The next 5 hooks have been implemented for reenabling sched1. With the
396 help of these macros we are limiting the movement of insns in sched1 to
397 reduce the register pressure. The overall idea is to keep count of SImode
398 and SFmode regs required by already scheduled insns. When these counts
399 cross some threshold values; give priority to insns that free registers.
400 The insn that frees registers is most likely to be the insn with lowest
401 LUID (original insn order); but such an insn might be there in the stalled
402 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
403 up to a max of 8 cycles so that such insns may move from Q -> R.
405 The description of the hooks are as below:
407 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
408 scheduler; it is called inside the sched_init function just after
409 find_insn_reg_weights function call. It is used to calculate the SImode
410 and SFmode weights of insns of basic blocks; much similar to what
411 find_insn_reg_weights does.
412 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
414 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
415 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
418 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
419 high; reorder the ready queue so that the insn with lowest LUID will be
422 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
423 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
425 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
426 can be returned from TARGET_SCHED_REORDER2.
428 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
430 #undef TARGET_SCHED_DFA_NEW_CYCLE
431 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
433 #undef TARGET_SCHED_INIT_GLOBAL
434 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
436 #undef TARGET_SCHED_FINISH_GLOBAL
437 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
439 #undef TARGET_SCHED_VARIABLE_ISSUE
440 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
442 #undef TARGET_SCHED_REORDER
443 #define TARGET_SCHED_REORDER sh_reorder
445 #undef TARGET_SCHED_REORDER2
446 #define TARGET_SCHED_REORDER2 sh_reorder2
448 #undef TARGET_SCHED_INIT
449 #define TARGET_SCHED_INIT sh_md_init
451 #undef TARGET_DELEGITIMIZE_ADDRESS
452 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
454 #undef TARGET_LEGITIMIZE_ADDRESS
455 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
457 #undef TARGET_CANNOT_MODIFY_JUMPS_P
458 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
459 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
460 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
461 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
462 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
463 sh_optimize_target_register_callee_saved
465 #undef TARGET_MS_BITFIELD_LAYOUT_P
466 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
468 #undef TARGET_INIT_BUILTINS
469 #define TARGET_INIT_BUILTINS sh_init_builtins
470 #undef TARGET_BUILTIN_DECL
471 #define TARGET_BUILTIN_DECL sh_builtin_decl
472 #undef TARGET_EXPAND_BUILTIN
473 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
475 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
476 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
478 #undef TARGET_CANNOT_COPY_INSN_P
479 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
480 #undef TARGET_RTX_COSTS
481 #define TARGET_RTX_COSTS sh_rtx_costs
482 #undef TARGET_ADDRESS_COST
483 #define TARGET_ADDRESS_COST sh_address_cost
484 #undef TARGET_ALLOCATE_INITIAL_VALUE
485 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
487 #undef TARGET_MACHINE_DEPENDENT_REORG
488 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
490 #undef TARGET_DWARF_REGISTER_SPAN
491 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
494 #undef TARGET_HAVE_TLS
495 #define TARGET_HAVE_TLS true
498 #undef TARGET_PROMOTE_PROTOTYPES
499 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
500 #undef TARGET_PROMOTE_FUNCTION_MODE
501 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
503 #undef TARGET_FUNCTION_VALUE
504 #define TARGET_FUNCTION_VALUE sh_function_value
505 #undef TARGET_FUNCTION_VALUE_REGNO_P
506 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
507 #undef TARGET_LIBCALL_VALUE
508 #define TARGET_LIBCALL_VALUE sh_libcall_value
509 #undef TARGET_STRUCT_VALUE_RTX
510 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
511 #undef TARGET_RETURN_IN_MEMORY
512 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
514 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
515 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
516 #undef TARGET_SETUP_INCOMING_VARARGS
517 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
518 #undef TARGET_STRICT_ARGUMENT_NAMING
519 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
520 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
521 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
522 #undef TARGET_MUST_PASS_IN_STACK
523 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
524 #undef TARGET_PASS_BY_REFERENCE
525 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
526 #undef TARGET_CALLEE_COPIES
527 #define TARGET_CALLEE_COPIES sh_callee_copies
528 #undef TARGET_ARG_PARTIAL_BYTES
529 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
530 #undef TARGET_FUNCTION_ARG
531 #define TARGET_FUNCTION_ARG sh_function_arg
532 #undef TARGET_FUNCTION_ARG_ADVANCE
533 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
535 #undef TARGET_BUILD_BUILTIN_VA_LIST
536 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
537 #undef TARGET_EXPAND_BUILTIN_VA_START
538 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
539 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
540 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
542 #undef TARGET_SCALAR_MODE_SUPPORTED_P
543 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
544 #undef TARGET_VECTOR_MODE_SUPPORTED_P
545 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
547 #undef TARGET_CHECK_PCH_TARGET_FLAGS
548 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
550 #undef TARGET_DWARF_CALLING_CONVENTION
551 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
553 #undef TARGET_FRAME_POINTER_REQUIRED
554 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
556 /* Return regmode weight for insn. */
557 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
558 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
560 /* Return current register pressure for regmode. */
561 #define CURR_REGMODE_PRESSURE(MODE)\
562 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
564 #undef TARGET_ENCODE_SECTION_INFO
565 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
567 #undef TARGET_SECONDARY_RELOAD
568 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
570 #undef TARGET_PREFERRED_RELOAD_CLASS
571 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
573 #undef TARGET_CONDITIONAL_REGISTER_USAGE
574 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
576 #undef TARGET_LEGITIMATE_ADDRESS_P
577 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
579 #undef TARGET_TRAMPOLINE_INIT
580 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
581 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
582 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
584 #undef TARGET_LEGITIMATE_CONSTANT_P
585 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
587 #undef TARGET_CANONICALIZE_COMPARISON
588 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
590 #undef TARGET_FIXED_CONDITION_CODE_REGS
591 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
593 /* Machine-specific symbol_ref flags. */
594 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
596 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
597 is used by optabs.c atomic op expansion code as well as in sync.md. */
598 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
599 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
601 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Information on the currently selected atomic model.
605 This is initialized in sh_option_override. */
606 static sh_atomic_model selected_atomic_model_;
608 const sh_atomic_model&
609 selected_atomic_model (void)
611 return selected_atomic_model_;
614 static sh_atomic_model
615 parse_validate_atomic_model_option (const char* str)
617 const char* model_names[sh_atomic_model::num_models];
618 model_names[sh_atomic_model::none] = "none";
619 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
620 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
621 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
622 model_names[sh_atomic_model::soft_imask] = "soft-imask";
624 const char* model_cdef_names[sh_atomic_model::num_models];
625 model_cdef_names[sh_atomic_model::none] = "NONE";
626 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
627 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
628 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
629 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
632 ret.type = sh_atomic_model::none;
633 ret.name = model_names[sh_atomic_model::none];
634 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
636 ret.tcb_gbr_offset = -1;
638 /* Handle empty string as 'none'. */
639 if (str == NULL || *str == '\0')
642 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
644 std::vector<std::string> tokens;
645 for (std::stringstream ss (str); ss.good (); )
647 tokens.push_back (std::string ());
648 std::getline (ss, tokens.back (), ',');
652 err_ret ("invalid atomic model option");
654 /* The first token must be the atomic model name. */
656 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
657 if (tokens.front () == model_names[i])
659 ret.type = (sh_atomic_model::enum_type)i;
660 ret.name = model_names[i];
661 ret.cdef_name = model_cdef_names[i];
665 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
669 /* Go through the remaining tokens. */
670 for (size_t i = 1; i < tokens.size (); ++i)
672 if (tokens[i] == "strict")
674 else if (tokens[i].find ("gbr-offset=") == 0)
676 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
677 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
678 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
679 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
680 "option", offset_str.c_str ());
683 err_ret ("unknown parameter \"%s\" in atomic model option",
687 /* Check that the selection makes sense. */
688 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
689 err_ret ("atomic operations are not supported on SHmedia");
691 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
692 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
695 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
696 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
698 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
699 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
701 if (ret.type == sh_atomic_model::soft_tcb
702 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
703 || (ret.tcb_gbr_offset & 3) != 0))
704 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
705 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
708 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
709 err_ret ("cannot use atomic model %s in user mode", ret.name);
716 /* Register SH specific RTL passes. */
717 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
720 register_sh_passes (void)
725 /* Running the sh_treg_combine pass after ce1 generates better code when
726 comparisons are combined and reg-reg moves are introduced, because
727 reg-reg moves will be eliminated afterwards. However, there are quite
728 some cases where combine will be unable to fold comparison related insns,
729 thus for now don't do it.
730 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
731 PASS_POS_INSERT_AFTER, "ce1", 1);
734 /* Run sh_treg_combine pass after combine but before register allocation. */
735 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
736 PASS_POS_INSERT_AFTER, "split1", 1);
738 /* Run sh_treg_combine pass after register allocation and basic block
739 reordering as this sometimes creates new opportunities. */
740 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
741 PASS_POS_INSERT_AFTER, "split4", 1);
744 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
745 various options, and do some machine dependent initialization. */
747 sh_option_override (void)
751 SUBTARGET_OVERRIDE_OPTIONS;
752 if (optimize > 1 && !optimize_size)
753 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
754 sh_cpu = PROCESSOR_SH1;
755 assembler_dialect = 0;
757 sh_cpu = PROCESSOR_SH2;
759 sh_cpu = PROCESSOR_SH2E;
761 sh_cpu = PROCESSOR_SH2A;
763 sh_cpu = PROCESSOR_SH3;
765 sh_cpu = PROCESSOR_SH3E;
768 assembler_dialect = 1;
769 sh_cpu = PROCESSOR_SH4;
771 if (TARGET_SH4A_ARCH)
773 assembler_dialect = 1;
774 sh_cpu = PROCESSOR_SH4A;
778 sh_cpu = PROCESSOR_SH5;
779 target_flags |= MASK_ALIGN_DOUBLE;
780 if (TARGET_SHMEDIA_FPU)
781 target_flags |= MASK_FMOVD;
784 /* There are no delay slots on SHmedia. */
785 flag_delayed_branch = 0;
786 /* Relaxation isn't yet supported for SHmedia */
787 target_flags &= ~MASK_RELAX;
788 /* After reload, if conversion does little good but can cause
790 - find_if_block doesn't do anything for SH because we don't
791 have conditional execution patterns. (We use conditional
792 move patterns, which are handled differently, and only
794 - find_cond_trap doesn't do anything for the SH because we
795 don't have conditional traps.
796 - find_if_case_1 uses redirect_edge_and_branch_force in
797 the only path that does an optimization, and this causes
798 an ICE when branch targets are in registers.
799 - find_if_case_2 doesn't do anything for the SHmedia after
800 reload except when it can redirect a tablejump - and
801 that's rather rare. */
802 flag_if_conversion2 = 0;
803 if (! strcmp (sh_div_str, "call"))
804 sh_div_strategy = SH_DIV_CALL;
805 else if (! strcmp (sh_div_str, "call2"))
806 sh_div_strategy = SH_DIV_CALL2;
807 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
808 sh_div_strategy = SH_DIV_FP;
809 else if (! strcmp (sh_div_str, "inv"))
810 sh_div_strategy = SH_DIV_INV;
811 else if (! strcmp (sh_div_str, "inv:minlat"))
812 sh_div_strategy = SH_DIV_INV_MINLAT;
813 else if (! strcmp (sh_div_str, "inv20u"))
814 sh_div_strategy = SH_DIV_INV20U;
815 else if (! strcmp (sh_div_str, "inv20l"))
816 sh_div_strategy = SH_DIV_INV20L;
817 else if (! strcmp (sh_div_str, "inv:call2"))
818 sh_div_strategy = SH_DIV_INV_CALL2;
819 else if (! strcmp (sh_div_str, "inv:call"))
820 sh_div_strategy = SH_DIV_INV_CALL;
821 else if (! strcmp (sh_div_str, "inv:fp"))
824 sh_div_strategy = SH_DIV_INV_FP;
826 sh_div_strategy = SH_DIV_INV;
828 TARGET_CBRANCHDI4 = 0;
829 /* Assembler CFI isn't yet fully supported for SHmedia. */
830 flag_dwarf2_cfi_asm = 0;
835 /* Only the sh64-elf assembler fully supports .quad properly. */
836 targetm.asm_out.aligned_op.di = NULL;
837 targetm.asm_out.unaligned_op.di = NULL;
841 if (! strcmp (sh_div_str, "call-div1"))
842 sh_div_strategy = SH_DIV_CALL_DIV1;
843 else if (! strcmp (sh_div_str, "call-fp")
844 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
845 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
846 sh_div_strategy = SH_DIV_CALL_FP;
847 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
848 sh_div_strategy = SH_DIV_CALL_TABLE;
850 /* Pick one that makes most sense for the target in general.
851 It is not much good to use different functions depending
852 on -Os, since then we'll end up with two different functions
853 when some of the code is compiled for size, and some for
856 /* SH4 tends to emphasize speed. */
858 sh_div_strategy = SH_DIV_CALL_TABLE;
859 /* These have their own way of doing things. */
860 else if (TARGET_SH2A)
861 sh_div_strategy = SH_DIV_INTRINSIC;
862 /* ??? Should we use the integer SHmedia function instead? */
863 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
864 sh_div_strategy = SH_DIV_CALL_FP;
865 /* SH1 .. SH3 cores often go into small-footprint systems, so
866 default to the smallest implementation available. */
868 sh_div_strategy = SH_DIV_CALL_DIV1;
871 TARGET_PRETEND_CMOVE = 0;
872 if (sh_divsi3_libfunc[0])
873 ; /* User supplied - leave it alone. */
874 else if (TARGET_DIVIDE_CALL_FP)
875 sh_divsi3_libfunc = "__sdivsi3_i4";
876 else if (TARGET_DIVIDE_CALL_TABLE)
877 sh_divsi3_libfunc = "__sdivsi3_i4i";
879 sh_divsi3_libfunc = "__sdivsi3_1";
881 sh_divsi3_libfunc = "__sdivsi3";
882 if (sh_branch_cost == -1)
886 /* The SH1 does not have delay slots, hence we get a pipeline stall
887 at every branch. The SH4 is superscalar, so the single delay slot
888 is not sufficient to keep both pipelines filled. */
889 if (! TARGET_SH2 || TARGET_HARD_SH4)
893 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
894 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
895 TARGET_ZDCBRANCH = 1;
897 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
898 if (! VALID_REGISTER_P (regno))
899 sh_register_names[regno][0] = '\0';
901 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
902 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
903 sh_additional_register_names[regno][0] = '\0';
905 if ((flag_pic && ! TARGET_PREFERGOT)
906 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
907 flag_no_function_cse = 1;
909 if (targetm.small_register_classes_for_mode_p (VOIDmode))
911 /* Never run scheduling before reload, since that can
912 break global alloc, and generates slower code anyway due
913 to the pressure on R0. */
914 /* Enable sched1 for SH4 if the user explicitly requests.
915 When sched1 is enabled, the ready queue will be reordered by
916 the target hooks if pressure is high. We can not do this for
917 PIC, SH3 and lower as they give spill failures for R0. */
918 if (!TARGET_HARD_SH4 || flag_pic)
919 flag_schedule_insns = 0;
920 /* ??? Current exception handling places basic block boundaries
921 after call_insns. It causes the high pressure on R0 and gives
922 spill failures for R0 in reload. See PR 22553 and the thread
924 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
925 else if (flag_exceptions)
927 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
928 warning (0, "ignoring -fschedule-insns because of exception "
930 flag_schedule_insns = 0;
932 else if (flag_schedule_insns
933 && !global_options_set.x_flag_schedule_insns)
934 flag_schedule_insns = 0;
937 /* Unwind info is not correct around the CFG unless either a frame
938 pointer is present or M_A_O_A is set. Fixing this requires rewriting
939 unwind info generation to be aware of the CFG and propagating states
941 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
942 || flag_exceptions || flag_non_call_exceptions)
943 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
945 warning (0, "unwind tables currently require either a frame pointer "
946 "or -maccumulate-outgoing-args for correctness");
947 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
950 /* Unwinding with -freorder-blocks-and-partition does not work on this
951 architecture, because it requires far jumps to label crossing between
952 hot/cold sections which are rejected on this architecture. */
953 if (flag_reorder_blocks_and_partition)
957 inform (input_location,
958 "-freorder-blocks-and-partition does not work with "
959 "exceptions on this architecture");
960 flag_reorder_blocks_and_partition = 0;
961 flag_reorder_blocks = 1;
963 else if (flag_unwind_tables)
965 inform (input_location,
966 "-freorder-blocks-and-partition does not support unwind "
967 "info on this architecture");
968 flag_reorder_blocks_and_partition = 0;
969 flag_reorder_blocks = 1;
973 /* Adjust loop, jump and function alignment values (in bytes), if those
974 were not specified by the user using -falign-loops, -falign-jumps
975 and -falign-functions options.
976 32 bit alignment is better for speed, because instructions can be
977 fetched as a pair from a longword boundary. For size use 16 bit
978 alignment to get more compact code.
979 Aligning all jumps increases the code size, even if it might
980 result in slightly faster code. Thus, it is set to the smallest
981 alignment possible if not specified by the user. */
982 if (align_loops == 0)
987 align_loops = optimize_size ? 2 : 4;
990 if (align_jumps == 0)
993 align_jumps = 1 << CACHE_LOG;
997 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
998 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1000 if (align_functions == 0)
1003 align_functions = optimize_size
1004 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1006 align_functions = optimize_size ? 2 : 4;
1009 /* The linker relaxation code breaks when a function contains
1010 alignments that are larger than that at the start of a
1011 compilation unit. */
1014 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1016 /* Also take possible .long constants / mova tables into account. */
1019 if (align_functions < min_align)
1020 align_functions = min_align;
1023 if (flag_unsafe_math_optimizations)
1025 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1026 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1029 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1030 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1034 /* Allow fsrra insn only if -funsafe-math-optimizations and
1035 -ffinite-math-only is enabled. */
1036 TARGET_FSRRA = TARGET_FSRRA
1037 && flag_unsafe_math_optimizations
1038 && flag_finite_math_only;
1040 /* If the -mieee option was not explicitly set by the user, turn it on
1041 unless -ffinite-math-only was specified. See also PR 33135. */
1042 if (! global_options_set.x_TARGET_IEEE)
1043 TARGET_IEEE = ! flag_finite_math_only;
1045 if (sh_fixed_range_str)
1046 sh_fix_range (sh_fixed_range_str);
1048 /* This target defaults to strict volatile bitfields. */
1049 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1050 flag_strict_volatile_bitfields = 1;
1052 /* Parse atomic model option and make sure it is valid for the current
1054 selected_atomic_model_
1055 = parse_validate_atomic_model_option (sh_atomic_model_str);
1057 register_sh_passes ();
1060 /* Print the operand address in x to the stream. */
1062 sh_print_operand_address (FILE *stream, rtx x)
1064 switch (GET_CODE (x))
1068 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1073 rtx base = XEXP (x, 0);
1074 rtx index = XEXP (x, 1);
1076 switch (GET_CODE (index))
1079 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1080 reg_names[true_regnum (base)]);
1086 int base_num = true_regnum (base);
1087 int index_num = true_regnum (index);
1089 fprintf (stream, "@(r0,%s)",
1090 reg_names[MAX (base_num, index_num)]);
1101 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1105 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1109 x = mark_constant_pool_use (x);
1110 output_addr_const (stream, x);
1115 /* Print operand x (an rtx) in assembler syntax to file stream
1116 according to modifier code.
1118 '.' print a .s if insn needs delay slot
1119 ',' print LOCAL_LABEL_PREFIX
1120 '@' print trap, rte or rts depending upon pragma interruptness
1121 '#' output a nop if there is nothing to put in the delay slot
1122 ''' print likelihood suffix (/u for unlikely).
1123 '>' print branch target if -fverbose-asm
1124 'O' print a constant without the #
1125 'R' print the LSW of a dp value - changes if in little endian
1126 'S' print the MSW of a dp value - changes if in little endian
1127 'T' print the next word of a dp value - same as 'R' in big endian mode.
1128 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1129 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1130 'N' print 'r63' if the operand is (const_int 0).
1131 'd' print a V2SF reg as dN instead of fpN.
1132 'm' print a pair `base,offset' or `base,index', for LD and ST.
1133 'U' Likewise for {LD,ST}{HI,LO}.
1134 'V' print the position of a single bit set.
1135 'W' print the position of a single bit cleared.
1136 't' print a memory address which is a register.
1137 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1138 'o' output an operator. */
1140 sh_print_operand (FILE *stream, rtx x, int code)
1143 enum machine_mode mode;
1151 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1152 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1153 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1156 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1159 trapa_attr = lookup_attribute ("trap_exit",
1160 DECL_ATTRIBUTES (current_function_decl));
1162 fprintf (stream, "trapa #%ld",
1163 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1164 else if (sh_cfun_interrupt_handler_p ())
1166 if (sh_cfun_resbank_handler_p ())
1167 fprintf (stream, "resbank\n");
1168 fprintf (stream, "rte");
1171 fprintf (stream, "rts");
1174 /* Output a nop if there's nothing in the delay slot. */
1175 if (dbr_sequence_length () == 0)
1176 fprintf (stream, "\n\tnop");
1180 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1182 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1183 fputs ("/u", stream);
1187 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1189 fputs ("\t! target: ", stream);
1190 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1194 x = mark_constant_pool_use (x);
1195 output_addr_const (stream, x);
1197 /* N.B.: %R / %S / %T adjust memory addresses by four.
1198 For SHMEDIA, that means they can be used to access the first and
1199 second 32 bit part of a 64 bit (or larger) value that
1200 might be held in floating point registers or memory.
1201 While they can be used to access 64 bit parts of a larger value
1202 held in general purpose registers, that won't work with memory -
1203 neither for fp registers, since the frxx names are used. */
1205 if (REG_P (x) || GET_CODE (x) == SUBREG)
1207 regno = true_regnum (x);
1208 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1209 fputs (reg_names[regno], (stream));
1213 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1214 sh_print_operand_address (stream, XEXP (x, 0));
1220 mode = GET_MODE (x);
1221 if (mode == VOIDmode)
1223 if (GET_MODE_SIZE (mode) >= 8)
1224 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1226 sh_print_operand (stream, sub, 0);
1228 output_operand_lossage ("invalid operand to %%R");
1232 if (REG_P (x) || GET_CODE (x) == SUBREG)
1234 regno = true_regnum (x);
1235 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1236 fputs (reg_names[regno], (stream));
1240 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1241 sh_print_operand_address (stream, XEXP (x, 0));
1247 mode = GET_MODE (x);
1248 if (mode == VOIDmode)
1250 if (GET_MODE_SIZE (mode) >= 8)
1251 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1253 sh_print_operand (stream, sub, 0);
1255 output_operand_lossage ("invalid operand to %%S");
1259 /* Next word of a double. */
1260 switch (GET_CODE (x))
1263 fputs (reg_names[REGNO (x) + 1], (stream));
1266 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1267 && GET_CODE (XEXP (x, 0)) != POST_INC)
1268 x = adjust_address (x, SImode, 4);
1269 sh_print_operand_address (stream, XEXP (x, 0));
1277 gcc_assert (MEM_P (x));
1279 switch (GET_CODE (x))
1283 sh_print_operand (stream, x, 0);
1291 switch (GET_CODE (x))
1293 case PLUS: fputs ("add", stream); break;
1294 case MINUS: fputs ("sub", stream); break;
1295 case MULT: fputs ("mul", stream); break;
1296 case DIV: fputs ("div", stream); break;
1297 case EQ: fputs ("eq", stream); break;
1298 case NE: fputs ("ne", stream); break;
1299 case GT: case LT: fputs ("gt", stream); break;
1300 case GE: case LE: fputs ("ge", stream); break;
1301 case GTU: case LTU: fputs ("gtu", stream); break;
1302 case GEU: case LEU: fputs ("geu", stream); break;
1311 && GET_CODE (XEXP (x, 0)) == PLUS
1312 && (REG_P (XEXP (XEXP (x, 0), 1))
1313 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1314 fputc ('x', stream);
1320 switch (GET_MODE (x))
1322 case QImode: fputs (".b", stream); break;
1323 case HImode: fputs (".w", stream); break;
1324 case SImode: fputs (".l", stream); break;
1325 case SFmode: fputs (".s", stream); break;
1326 case DFmode: fputs (".d", stream); break;
1327 default: gcc_unreachable ();
1334 gcc_assert (MEM_P (x));
1338 switch (GET_CODE (x))
1342 sh_print_operand (stream, x, 0);
1343 fputs (", 0", stream);
1347 sh_print_operand (stream, XEXP (x, 0), 0);
1348 fputs (", ", stream);
1349 sh_print_operand (stream, XEXP (x, 1), 0);
1359 int num = exact_log2 (INTVAL (x));
1360 gcc_assert (num >= 0);
1361 fprintf (stream, "#%d", num);
1367 int num = exact_log2 (~INTVAL (x));
1368 gcc_assert (num >= 0);
1369 fprintf (stream, "#%d", num);
1374 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1376 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1380 if (x == CONST0_RTX (GET_MODE (x)))
1382 fprintf ((stream), "r63");
1385 goto default_output;
1387 if (CONST_INT_P (x))
1389 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1397 mode = GET_MODE (x);
1399 switch (GET_CODE (x))
1403 rtx inner = XEXP (x, 0);
1405 enum machine_mode inner_mode;
1407 /* We might see SUBREGs with vector mode registers inside. */
1408 if (GET_CODE (inner) == SUBREG
1409 && (GET_MODE_SIZE (GET_MODE (inner))
1410 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1411 && subreg_lowpart_p (inner))
1412 inner = SUBREG_REG (inner);
1413 if (CONST_INT_P (inner))
1415 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1416 goto default_output;
1418 inner_mode = GET_MODE (inner);
1419 if (GET_CODE (inner) == SUBREG
1420 && (GET_MODE_SIZE (GET_MODE (inner))
1421 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1422 && REG_P (SUBREG_REG (inner)))
1424 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1425 GET_MODE (SUBREG_REG (inner)),
1426 SUBREG_BYTE (inner),
1428 inner = SUBREG_REG (inner);
1430 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1432 /* Floating point register pairs are always big endian;
1433 general purpose registers are 64 bit wide. */
1434 regno = REGNO (inner);
1435 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1436 - HARD_REGNO_NREGS (regno, mode))
1444 /* FIXME: We need this on SHmedia32 because reload generates
1445 some sign-extended HI or QI loads into DImode registers
1446 but, because Pmode is SImode, the address ends up with a
1447 subreg:SI of the DImode register. Maybe reload should be
1448 fixed so as to apply alter_subreg to such loads? */
1450 gcc_assert (trapping_target_operand (x, VOIDmode));
1451 x = XEXP (XEXP (x, 2), 0);
1452 goto default_output;
1454 gcc_assert (SUBREG_BYTE (x) == 0
1455 && REG_P (SUBREG_REG (x)));
1463 if (FP_REGISTER_P (regno)
1464 && mode == V16SFmode)
1465 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1466 else if (FP_REGISTER_P (REGNO (x))
1467 && mode == V4SFmode)
1468 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1470 && mode == V2SFmode)
1471 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1472 else if (FP_REGISTER_P (REGNO (x))
1473 && GET_MODE_SIZE (mode) > 4)
1474 fprintf ((stream), "d%s", reg_names[regno] + 1);
1476 fputs (reg_names[regno], (stream));
1480 output_address (XEXP (x, 0));
1485 fputc ('#', stream);
1486 output_addr_const (stream, x);
1494 sh_print_operand_punct_valid_p (unsigned char code)
1496 return (code == '.' || code == '#' || code == '@' || code == ','
1497 || code == '$' || code == '\'' || code == '>');
1500 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1502 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1504 if (GET_CODE (x) == UNSPEC)
1506 switch (XINT (x, 1))
1508 case UNSPEC_DATALABEL:
1509 fputs ("datalabel ", file);
1510 output_addr_const (file, XVECEXP (x, 0, 0));
1513 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1514 output_addr_const (file, XVECEXP (x, 0, 0));
1517 output_addr_const (file, XVECEXP (x, 0, 0));
1518 fputs ("@GOT", file);
1521 output_addr_const (file, XVECEXP (x, 0, 0));
1522 fputs ("@GOTOFF", file);
1525 output_addr_const (file, XVECEXP (x, 0, 0));
1526 fputs ("@PLT", file);
1529 output_addr_const (file, XVECEXP (x, 0, 0));
1530 fputs ("@GOTPLT", file);
1533 output_addr_const (file, XVECEXP (x, 0, 0));
1534 fputs ("@DTPOFF", file);
1536 case UNSPEC_GOTTPOFF:
1537 output_addr_const (file, XVECEXP (x, 0, 0));
1538 fputs ("@GOTTPOFF", file);
1541 output_addr_const (file, XVECEXP (x, 0, 0));
1542 fputs ("@TPOFF", file);
1547 /* LPCS stands for Label for PIC Call Site. */
1548 targetm.asm_out.generate_internal_label (name, "LPCS",
1549 INTVAL (XVECEXP (x, 0, 0)));
1550 assemble_name (file, name);
1553 case UNSPEC_EXTRACT_S16:
1554 case UNSPEC_EXTRACT_U16:
1558 val = XVECEXP (x, 0, 0);
1559 shift = XVECEXP (x, 0, 1);
1561 if (shift != const0_rtx)
1563 if (GET_CODE (val) == CONST
1564 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1567 output_addr_const (file, val);
1571 output_addr_const (file, val);
1572 if (shift != const0_rtx)
1574 fputs (" >> ", file);
1575 output_addr_const (file, shift);
1578 fputs (" & 65535)", file);
1582 output_addr_const (file, XVECEXP (x, 0, 0));
1584 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1587 output_addr_const (file, XVECEXP (x, 0, 1));
1591 output_addr_const (file, XVECEXP (x, 0, 1));
1593 case UNSPEC_PCREL_SYMOFF:
1594 output_addr_const (file, XVECEXP (x, 0, 0));
1596 output_addr_const (file, XVECEXP (x, 0, 1));
1597 fputs ("-.)", file);
1608 /* Encode symbol attributes of a SYMBOL_REF into its
1609 SYMBOL_REF_FLAGS. */
1611 sh_encode_section_info (tree decl, rtx rtl, int first)
1613 default_encode_section_info (decl, rtl, first);
1615 if (TREE_CODE (decl) == FUNCTION_DECL
1616 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1617 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1620 /* Prepare operands for a move define_expand; specifically, one of the
1621 operands must be in a register. */
1623 prepare_move_operands (rtx operands[], enum machine_mode mode)
1625 if ((mode == SImode || mode == DImode)
1627 && ! ((mode == Pmode || mode == ptr_mode)
1628 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1631 if (SYMBOLIC_CONST_P (operands[1]))
1633 if (MEM_P (operands[0]))
1634 operands[1] = force_reg (Pmode, operands[1]);
1635 else if (TARGET_SHMEDIA
1636 && GET_CODE (operands[1]) == LABEL_REF
1637 && target_reg_operand (operands[0], mode))
1641 temp = (!can_create_pseudo_p ()
1643 : gen_reg_rtx (Pmode));
1644 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1647 else if (GET_CODE (operands[1]) == CONST
1648 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1649 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1651 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1652 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1654 operands[1] = expand_binop (mode, add_optab, temp,
1655 XEXP (XEXP (operands[1], 0), 1),
1656 (!can_create_pseudo_p ()
1658 : gen_reg_rtx (Pmode)),
1659 0, OPTAB_LIB_WIDEN);
1663 if (! reload_in_progress && ! reload_completed)
1665 /* Copy the source to a register if both operands aren't registers. */
1666 if (! register_operand (operands[0], mode)
1667 && ! sh_register_operand (operands[1], mode))
1668 operands[1] = copy_to_mode_reg (mode, operands[1]);
1670 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1672 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1673 except that we can't use that function because it is static. */
1674 rtx new_rtx = change_address (operands[0], mode, 0);
1675 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1676 operands[0] = new_rtx;
1679 /* This case can happen while generating code to move the result
1680 of a library call to the target. Reject `st r0,@(rX,rY)' because
1681 reload will fail to find a spill register for rX, since r0 is already
1682 being used for the source. */
1684 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1685 && MEM_P (operands[0])
1686 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1687 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1688 operands[1] = copy_to_mode_reg (mode, operands[1]);
1691 if (mode == Pmode || mode == ptr_mode)
1694 enum tls_model tls_kind;
1698 if (GET_CODE (op1) == CONST
1699 && GET_CODE (XEXP (op1, 0)) == PLUS
1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1703 opc = XEXP (XEXP (op1, 0), 1);
1704 op1 = XEXP (XEXP (op1, 0), 0);
1709 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1711 rtx tga_op1, tga_ret, tmp, tmp2;
1714 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1715 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1716 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1718 /* Don't schedule insns for getting GOT address when
1719 the first scheduling is enabled, to avoid spill
1721 if (flag_schedule_insns)
1722 emit_insn (gen_blockage ());
1723 emit_insn (gen_GOTaddr2picreg ());
1724 emit_use (gen_rtx_REG (SImode, PIC_REG));
1725 if (flag_schedule_insns)
1726 emit_insn (gen_blockage ());
1731 case TLS_MODEL_GLOBAL_DYNAMIC:
1732 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1733 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1734 tmp = gen_reg_rtx (Pmode);
1735 emit_move_insn (tmp, tga_ret);
1739 case TLS_MODEL_LOCAL_DYNAMIC:
1740 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1741 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1743 tmp = gen_reg_rtx (Pmode);
1744 emit_move_insn (tmp, tga_ret);
1746 if (register_operand (op0, Pmode))
1749 tmp2 = gen_reg_rtx (Pmode);
1751 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1755 case TLS_MODEL_INITIAL_EXEC:
1756 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1757 tmp = gen_sym2GOTTPOFF (op1);
1758 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1762 case TLS_MODEL_LOCAL_EXEC:
1763 tmp2 = gen_reg_rtx (Pmode);
1764 emit_insn (gen_store_gbr (tmp2));
1765 tmp = gen_reg_rtx (Pmode);
1766 emit_insn (gen_symTPOFF2reg (tmp, op1));
1768 if (register_operand (op0, Pmode))
1771 op1 = gen_reg_rtx (Pmode);
1773 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1780 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1786 /* Implement the canonicalize_comparison target hook for the combine
1787 pass. For the target hook this function is invoked via
1788 sh_canonicalize_comparison. This function is also re-used to
1789 canonicalize comparisons in cbranch pattern expanders. */
1791 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1792 enum machine_mode mode,
1793 bool op0_preserve_value)
1795 /* When invoked from within the combine pass the mode is not specified,
1796 so try to get it from one of the operands. */
1797 if (mode == VOIDmode)
1798 mode = GET_MODE (op0);
1799 if (mode == VOIDmode)
1800 mode = GET_MODE (op1);
1802 // We need to have a mode to do something useful here.
1803 if (mode == VOIDmode)
1806 // Currently, we don't deal with floats here.
1807 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1810 // Make sure that the constant operand is the second operand.
1811 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1813 if (op0_preserve_value)
1816 std::swap (op0, op1);
1817 cmp = swap_condition (cmp);
1820 if (CONST_INT_P (op1))
1822 /* Try to adjust the constant operand in such a way that available
1823 comparison insns can be utilized better and the constant can be
1824 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1826 const HOST_WIDE_INT val = INTVAL (op1);
1828 /* x > -1 --> x >= 0
1829 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1831 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1832 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1834 cmp = cmp == GT ? GE : LT;
1835 op1 = gen_int_mode (val + 1, mode);
1839 x >= 0x80 --> x > 0x7F
1841 x < 0x80 --> x <= 0x7F */
1842 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1844 cmp = cmp == GE ? GT : LE;
1845 op1 = gen_int_mode (val - 1, mode);
1848 /* unsigned x >= 1 --> x != 0
1849 unsigned x < 1 --> x == 0 */
1850 else if (val == 1 && (cmp == GEU || cmp == LTU))
1852 cmp = cmp == GEU ? NE : EQ;
1853 op1 = CONST0_RTX (mode);
1856 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1857 unsigned x < 0x80 --> unsigned x < 0x7F */
1858 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1860 cmp = cmp == GEU ? GTU : LEU;
1861 op1 = gen_int_mode (val - 1, mode);
1864 /* unsigned x > 0 --> x != 0
1865 unsigned x <= 0 --> x == 0 */
1866 else if (val == 0 && (cmp == GTU || cmp == LEU))
1867 cmp = cmp == GTU ? NE : EQ;
1869 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1870 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1871 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1872 && val == 0x7FFFFFFF)
1874 cmp = cmp == GTU ? LT : GE;
1878 /* unsigned x >= 0x80000000 --> signed x < 0
1879 unsigned x < 0x80000000 --> signed x >= 0 */
1880 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1881 && (unsigned HOST_WIDE_INT)val
1882 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1884 cmp = cmp == GEU ? LT : GE;
1890 /* This function implements the canonicalize_comparison target hook.
1891 This wrapper around the internally used sh_canonicalize_comparison
1892 function is needed to do the enum rtx_code <-> int conversion.
1893 Target hooks cannot use enum rtx_code in its definition. */
1895 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1896 bool op0_preserve_value)
1898 enum rtx_code tmp_code = (enum rtx_code)*code;
1899 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1900 VOIDmode, op0_preserve_value);
1901 *code = (int)tmp_code;
1905 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1908 *p2 = INVALID_REGNUM;
1913 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1914 enum rtx_code comparison)
1916 /* The scratch reg is only available when this is invoked from within
1917 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1918 rtx scratch = NULL_RTX;
1920 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1921 comparison = GET_CODE (operands[0]);
1923 scratch = operands[4];
1925 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1928 /* Notice that this function is also invoked after reload by
1929 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1930 rtx op1 = operands[1];
1932 if (can_create_pseudo_p ())
1933 operands[1] = force_reg (mode, op1);
1934 /* When we are handling DImode comparisons, we want to keep constants so
1935 that we can optimize the component comparisons; however, memory loads
1936 are better issued as a whole so that they can be scheduled well.
1937 SImode equality comparisons allow I08 constants, but only when they
1938 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1939 into a register, that register might as well be r0, and we allow the
1940 constant. If it is already in a register, this is likely to be
1941 allocated to a different hard register, thus we load the constant into
1942 a register unless it is zero. */
1943 if (!REG_P (operands[2])
1944 && (!CONST_INT_P (operands[2])
1945 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1946 && ((comparison != EQ && comparison != NE)
1947 || (REG_P (op1) && REGNO (op1) != R0_REG)
1948 || !satisfies_constraint_I08 (operands[2])))))
1950 if (scratch && GET_MODE (scratch) == mode)
1952 emit_move_insn (scratch, operands[2]);
1953 operands[2] = scratch;
1955 else if (can_create_pseudo_p ())
1956 operands[2] = force_reg (mode, operands[2]);
1962 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1964 rtx (*branch_expander) (rtx) = gen_branch_true;
1965 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1968 case NE: case LT: case LE: case LTU: case LEU:
1969 comparison = reverse_condition (comparison);
1970 branch_expander = gen_branch_false;
1973 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
1974 gen_rtx_fmt_ee (comparison, SImode,
1975 operands[1], operands[2])));
1976 rtx jump = emit_jump_insn (branch_expander (operands[3]));
1977 if (probability >= 0)
1978 add_int_reg_note (jump, REG_BR_PROB, probability);
1981 /* ??? How should we distribute probabilities when more than one branch
1982 is generated. So far we only have some ad-hoc observations:
1983 - If the operands are random, they are likely to differ in both parts.
1984 - If comparing items in a hash chain, the operands are random or equal;
1985 operation should be EQ or NE.
1986 - If items are searched in an ordered tree from the root, we can expect
1987 the highpart to be unequal about half of the time; operation should be
1988 an inequality comparison, operands non-constant, and overall probability
1989 about 50%. Likewise for quicksort.
1990 - Range checks will be often made against constants. Even if we assume for
1991 simplicity an even distribution of the non-constant operand over a
1992 sub-range here, the same probability could be generated with differently
1993 wide sub-ranges - as long as the ratio of the part of the subrange that
1994 is before the threshold to the part that comes after the threshold stays
1995 the same. Thus, we can't really tell anything here;
1996 assuming random distribution is at least simple.
1999 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2001 enum rtx_code msw_taken, msw_skip, lsw_taken;
2002 rtx skip_label = NULL_RTX;
2003 rtx op1h, op1l, op2h, op2l;
2006 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2007 rtx scratch = operands[4];
2009 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2010 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2011 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2012 op1l = gen_lowpart (SImode, operands[1]);
2013 op2l = gen_lowpart (SImode, operands[2]);
2014 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2015 prob = split_branch_probability;
2016 rev_prob = REG_BR_PROB_BASE - prob;
2019 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2020 That costs 1 cycle more when the first branch can be predicted taken,
2021 but saves us mispredicts because only one branch needs prediction.
2022 It also enables generating the cmpeqdi_t-1 pattern. */
2024 if (TARGET_CMPEQDI_T)
2026 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2027 emit_jump_insn (gen_branch_true (operands[3]));
2034 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2035 msw_skip_prob = rev_prob;
2036 if (REG_BR_PROB_BASE <= 65535)
2037 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2040 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2044 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2045 / ((HOST_WIDEST_INT) prob << 32)))
2051 if (TARGET_CMPEQDI_T)
2053 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2054 emit_jump_insn (gen_branch_false (operands[3]));
2058 msw_taken_prob = prob;
2063 msw_taken = comparison;
2064 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2066 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2067 msw_skip = swap_condition (msw_taken);
2071 if (op2l == CONST0_RTX (SImode))
2072 msw_taken = comparison;
2075 msw_taken = comparison == GE ? GT : GTU;
2076 msw_skip = swap_condition (msw_taken);
2081 msw_taken = comparison;
2082 if (op2l == CONST0_RTX (SImode))
2084 msw_skip = swap_condition (msw_taken);
2088 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2089 msw_taken = comparison;
2093 if (comparison == LE)
2095 else if (op2h != CONST0_RTX (SImode))
2099 msw_skip = swap_condition (LTU);
2102 msw_skip = swap_condition (msw_taken);
2105 default: return false;
2107 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2108 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2109 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2110 if (comparison != EQ && comparison != NE && num_branches > 1)
2112 if (!CONSTANT_P (operands[2])
2113 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2114 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2116 msw_taken_prob = prob / 2U;
2118 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2119 lsw_taken_prob = prob;
2123 msw_taken_prob = prob;
2124 msw_skip_prob = REG_BR_PROB_BASE;
2125 /* ??? If we have a constant op2h, should we use that when
2126 calculating lsw_taken_prob? */
2127 lsw_taken_prob = prob;
2132 operands[4] = NULL_RTX;
2133 if (reload_completed
2134 && ! arith_reg_or_0_operand (op2h, SImode)
2135 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2136 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2137 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2139 emit_move_insn (scratch, operands[2]);
2140 operands[2] = scratch;
2142 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2143 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2144 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2146 rtx taken_label = operands[3];
2148 /* Operands were possibly modified, but msw_skip doesn't expect this.
2149 Always use the original ones. */
2150 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2154 if (reload_completed
2155 && ! arith_reg_or_0_operand (op2h, SImode)
2156 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2158 emit_move_insn (scratch, operands[2]);
2159 operands[2] = scratch;
2163 operands[3] = skip_label = gen_label_rtx ();
2164 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2165 operands[3] = taken_label;
2169 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2171 if (reload_completed
2172 && ! arith_reg_or_0_operand (op2l, SImode)
2173 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2175 emit_move_insn (scratch, operands[2]);
2176 operands[2] = scratch;
2178 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2180 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2181 emit_label (skip_label);
2185 /* Given an operand, return 1 if the evaluated operand plugged into an
2186 if_then_else will result in a branch_true, 0 if branch_false, or
2187 -1 if neither nor applies. The truth table goes like this:
2189 op | cmpval | code | result
2190 ---------+--------+---------+--------------------
2191 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2192 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2193 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2194 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2195 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2196 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2197 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2198 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2200 sh_eval_treg_value (rtx op)
2202 enum rtx_code code = GET_CODE (op);
2203 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2206 int cmpop = code == EQ ? 1 : 0;
2207 int cmpval = INTVAL (XEXP (op, 1));
2208 if (cmpval != 0 && cmpval != 1)
2212 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2214 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2219 return t ^ (cmpval == cmpop);
2222 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2225 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2227 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2229 insn = gen_rtx_PARALLEL (VOIDmode,
2231 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2232 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2238 /* Prepare the operands for an scc instruction; make sure that the
2239 compare has been done and the result is in T_REG. */
2241 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2243 rtx t_reg = get_t_reg_rtx ();
2244 enum rtx_code oldcode = code;
2245 enum machine_mode mode;
2247 /* First need a compare insn. */
2251 /* It isn't possible to handle this case. */
2268 if (code != oldcode)
2275 mode = GET_MODE (op0);
2276 if (mode == VOIDmode)
2277 mode = GET_MODE (op1);
2279 op0 = force_reg (mode, op0);
2280 if ((code != EQ && code != NE
2281 && (op1 != const0_rtx
2282 || code == GTU || code == GEU || code == LTU || code == LEU))
2283 || (mode == DImode && op1 != const0_rtx)
2284 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2285 op1 = force_reg (mode, op1);
2287 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2288 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2293 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2296 rtx target = gen_reg_rtx (SImode);
2299 gcc_assert (TARGET_SHMEDIA);
2308 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2309 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2319 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2320 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2338 rtx t2 = gen_reg_rtx (DImode);
2339 emit_insn (gen_extendsidi2 (t2, target));
2343 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2346 /* Called from the md file, set up the operands of a compare instruction. */
2348 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2350 enum rtx_code code = GET_CODE (operands[0]);
2351 enum rtx_code branch_code;
2352 rtx op0 = operands[1];
2353 rtx op1 = operands[2];
2355 bool need_ccmpeq = false;
2357 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2359 op0 = force_reg (mode, op0);
2360 op1 = force_reg (mode, op1);
2364 if (code != EQ || mode == DImode)
2366 /* Force args into regs, since we can't use constants here. */
2367 op0 = force_reg (mode, op0);
2368 if (op1 != const0_rtx || code == GTU || code == GEU)
2369 op1 = force_reg (mode, op1);
2373 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2376 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2377 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2379 tem = op0, op0 = op1, op1 = tem;
2380 code = swap_condition (code);
2383 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2386 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2391 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2392 to EQ/GT respectively. */
2393 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2410 branch_code = reverse_condition (code);
2416 insn = gen_rtx_SET (VOIDmode,
2418 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2420 sh_emit_set_t_insn (insn, mode);
2422 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2424 if (branch_code == code)
2425 emit_jump_insn (gen_branch_true (operands[3]));
2427 emit_jump_insn (gen_branch_false (operands[3]));
2431 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2433 enum rtx_code code = GET_CODE (operands[1]);
2434 rtx op0 = operands[2];
2435 rtx op1 = operands[3];
2437 bool invert = false;
2440 op0 = force_reg (mode, op0);
2441 if ((code != EQ && code != NE
2442 && (op1 != const0_rtx
2443 || code == GTU || code == GEU || code == LTU || code == LEU))
2444 || (mode == DImode && op1 != const0_rtx)
2445 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2446 op1 = force_reg (mode, op1);
2448 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2450 if (code == LT || code == LE)
2452 code = swap_condition (code);
2453 tem = op0, op0 = op1, op1 = tem;
2459 lab = gen_label_rtx ();
2460 sh_emit_scc_to_t (EQ, op0, op1);
2461 emit_jump_insn (gen_branch_true (lab));
2478 sh_emit_scc_to_t (code, op0, op1);
2482 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2484 emit_move_insn (operands[0], get_t_reg_rtx ());
2487 /* Functions to output assembly code. */
2489 /* Return a sequence of instructions to perform DI or DF move.
2491 Since the SH cannot move a DI or DF in one instruction, we have
2492 to take care when we see overlapping source and dest registers. */
2494 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2495 enum machine_mode mode)
2497 rtx dst = operands[0];
2498 rtx src = operands[1];
2501 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2502 return "mov.l %T1,%0" "\n"
2505 if (register_operand (dst, mode)
2506 && register_operand (src, mode))
2508 if (REGNO (src) == MACH_REG)
2509 return "sts mach,%S0" "\n"
2512 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2513 when mov.d r1,r0 do r1->r0 then r2->r1. */
2514 if (REGNO (src) + 1 == REGNO (dst))
2515 return "mov %T1,%T0" "\n"
2518 return "mov %1,%0" "\n"
2521 else if (CONST_INT_P (src))
2523 if (INTVAL (src) < 0)
2524 output_asm_insn ("mov #-1,%S0", operands);
2526 output_asm_insn ("mov #0,%S0", operands);
2528 return "mov %1,%R0";
2530 else if (MEM_P (src))
2533 int dreg = REGNO (dst);
2534 rtx inside = XEXP (src, 0);
2536 switch (GET_CODE (inside))
2539 ptrreg = REGNO (inside);
2543 ptrreg = subreg_regno (inside);
2547 ptrreg = REGNO (XEXP (inside, 0));
2548 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2549 an offsettable address. Unfortunately, offsettable addresses use
2550 QImode to check the offset, and a QImode offsettable address
2551 requires r0 for the other operand, which is not currently
2552 supported, so we can't use the 'o' constraint.
2553 Thus we must check for and handle r0+REG addresses here.
2554 We punt for now, since this is likely very rare. */
2555 gcc_assert (!REG_P (XEXP (inside, 1)));
2559 return "mov.l %1,%0" "\n"
2562 return "mov.l %1,%0" "\n"
2568 /* Work out the safe way to copy. Copy into the second half first. */
2570 return "mov.l %T1,%T0" "\n"
2574 return "mov.l %1,%0" "\n"
2578 /* Print an instruction which would have gone into a delay slot after
2579 another instruction, but couldn't because the other instruction expanded
2580 into a sequence where putting the slot insn at the end wouldn't work. */
2582 print_slot (rtx insn)
2584 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2586 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2590 output_far_jump (rtx insn, rtx op)
2592 struct { rtx lab, reg, op; } this_jmp;
2593 rtx braf_base_lab = NULL_RTX;
2596 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2599 this_jmp.lab = gen_label_rtx ();
2603 && offset - get_attr_length (insn) <= 32766)
2606 jump = "mov.w %O0,%1" "\n"
2615 jump = "mov.l %O0,%1" "\n"
2618 jump = "mov.l r0,@-r15" "\n"
2620 " mov.l @r0,%1" "\n"
2622 " mov.l @r15+,r0" "\n"
2626 jump = "mov.l %O0,%1" "\n"
2629 /* If we have a scratch register available, use it. */
2630 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2631 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2633 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2634 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2635 jump = "mov.l r1,@-r15" "\n"
2637 " mov.l @r0,r1" "\n"
2639 " mov.l @r15+,r1" "\n"
2641 output_asm_insn (jump, &this_jmp.lab);
2642 if (dbr_sequence_length ())
2643 print_slot (final_sequence);
2645 output_asm_insn ("nop", 0);
2649 /* Output the delay slot insn first if any. */
2650 if (dbr_sequence_length ())
2651 print_slot (final_sequence);
2653 this_jmp.reg = gen_rtx_REG (SImode, 13);
2654 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2655 Fortunately, MACL is fixed and call-clobbered, and we never
2656 need its value across jumps, so save r13 in it instead of in
2659 output_asm_insn ("lds r13,macl", 0);
2661 output_asm_insn ("mov.l r13,@-r15", 0);
2662 output_asm_insn (jump, &this_jmp.lab);
2664 output_asm_insn ("sts macl,r13", 0);
2666 output_asm_insn ("mov.l @r15+,r13", 0);
2668 if (far && flag_pic && TARGET_SH2)
2670 braf_base_lab = gen_label_rtx ();
2671 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2672 CODE_LABEL_NUMBER (braf_base_lab));
2675 output_asm_insn (".align 2", 0);
2676 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2678 if (far && flag_pic)
2681 this_jmp.lab = braf_base_lab;
2682 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2685 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2689 /* Local label counter, used for constants in the pool and inside
2690 pattern branches. */
2691 static int lf = 100;
2693 /* Output code for ordinary branches. */
2695 output_branch (int logic, rtx insn, rtx *operands)
2697 switch (get_attr_length (insn))
2700 /* This can happen if filling the delay slot has caused a forward
2701 branch to exceed its range (we could reverse it, but only
2702 when we know we won't overextend other branches; this should
2703 best be handled by relaxation).
2704 It can also happen when other condbranches hoist delay slot insn
2705 from their destination, thus leading to code size increase.
2706 But the branch will still be in the range -4092..+4098 bytes. */
2710 /* The call to print_slot will clobber the operands. */
2711 rtx op0 = operands[0];
2713 /* If the instruction in the delay slot is annulled (true), then
2714 there is no delay slot where we can put it now. The only safe
2715 place for it is after the label. final will do that by default. */
2718 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2719 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2721 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2722 ASSEMBLER_DIALECT ? "/" : ".", label);
2723 print_slot (final_sequence);
2726 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2728 output_asm_insn ("bra\t%l0", &op0);
2729 fprintf (asm_out_file, "\tnop\n");
2730 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2734 /* When relaxing, handle this like a short branch. The linker
2735 will fix it up if it still doesn't fit after relaxation. */
2737 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2739 /* These are for SH2e, in which we have to account for the
2740 extra nop because of the hardware bug in annulled branches. */
2746 gcc_assert (!final_sequence
2747 || !(INSN_ANNULLED_BRANCH_P
2748 (XVECEXP (final_sequence, 0, 0))));
2749 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2751 ASSEMBLER_DIALECT ? "/" : ".", label);
2752 fprintf (asm_out_file, "\tnop\n");
2753 output_asm_insn ("bra\t%l0", operands);
2754 fprintf (asm_out_file, "\tnop\n");
2755 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2759 /* When relaxing, fall through. */
2764 sprintf (buffer, "b%s%ss\t%%l0",
2766 ASSEMBLER_DIALECT ? "/" : ".");
2767 output_asm_insn (buffer, &operands[0]);
2772 /* There should be no longer branches now - that would
2773 indicate that something has destroyed the branches set
2774 up in machine_dependent_reorg. */
2779 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2780 fill in operands 9 as a label to the successor insn.
2781 We try to use jump threading where possible.
2782 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2783 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2784 follow jmp and bt, if the address is in range. */
2786 output_branchy_insn (enum rtx_code code, const char *templ,
2787 rtx insn, rtx *operands)
2789 rtx next_insn = NEXT_INSN (insn);
2791 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2793 rtx src = SET_SRC (PATTERN (next_insn));
2794 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2796 /* Following branch not taken */
2797 operands[9] = gen_label_rtx ();
2798 emit_label_after (operands[9], next_insn);
2799 INSN_ADDRESSES_NEW (operands[9],
2800 INSN_ADDRESSES (INSN_UID (next_insn))
2801 + get_attr_length (next_insn));
2806 int offset = (branch_dest (next_insn)
2807 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2808 if (offset >= -252 && offset <= 258)
2810 if (GET_CODE (src) == IF_THEN_ELSE)
2812 src = XEXP (src, 1);
2818 operands[9] = gen_label_rtx ();
2819 emit_label_after (operands[9], insn);
2820 INSN_ADDRESSES_NEW (operands[9],
2821 INSN_ADDRESSES (INSN_UID (insn))
2822 + get_attr_length (insn));
2827 output_ieee_ccmpeq (rtx insn, rtx *operands)
2829 return output_branchy_insn (NE, "bt %l9" "\n"
2834 /* Output the start of the assembler file. */
2836 sh_file_start (void)
2838 default_file_start ();
2841 /* We need to show the text section with the proper
2842 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2843 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2844 will complain. We can teach GAS specifically about the
2845 default attributes for our choice of text section, but
2846 then we would have to change GAS again if/when we change
2847 the text section name. */
2848 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2850 /* Switch to the data section so that the coffsem symbol
2851 isn't in the text section. */
2852 switch_to_section (data_section);
2854 if (TARGET_LITTLE_ENDIAN)
2855 fputs ("\t.little\n", asm_out_file);
2859 if (TARGET_SHCOMPACT)
2860 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2861 else if (TARGET_SHMEDIA)
2862 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2863 TARGET_SHMEDIA64 ? 64 : 32);
2867 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2869 unspec_caller_rtx_p (rtx pat)
2874 split_const (pat, &base, &offset);
2875 if (GET_CODE (base) == UNSPEC)
2877 if (XINT (base, 1) == UNSPEC_CALLER)
2879 for (i = 0; i < XVECLEN (base, 0); i++)
2880 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2886 /* Indicate that INSN cannot be duplicated. This is true for insn
2887 that generates a unique label. */
2889 sh_cannot_copy_insn_p (rtx insn)
2893 if (!reload_completed || !flag_pic)
2896 if (!NONJUMP_INSN_P (insn))
2898 if (asm_noperands (insn) >= 0)
2901 pat = PATTERN (insn);
2902 if (GET_CODE (pat) != SET)
2904 pat = SET_SRC (pat);
2906 if (unspec_caller_rtx_p (pat))
2912 /* Number of instructions used to make an arithmetic right shift by N. */
2913 static const char ashiftrt_insns[] =
2914 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2916 /* Description of a logical left or right shift, when expanded to a sequence
2918 Notice that one bit right shifts clobber the T bit. One bit left shifts
2919 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2922 ASHL_CLOBBERS_T = 1 << 0,
2923 LSHR_CLOBBERS_T = 1 << 1
2926 struct ashl_lshr_sequence
2933 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2935 { 0, { 0 }, 0 }, // 0
2936 { 1, { 1 }, LSHR_CLOBBERS_T },
2938 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2939 { 2, { 2, 2 }, 0 }, // 4
2940 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 3, { 2, 2, 2 }, 0 },
2942 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2943 { 1, { 8 }, 0 }, // 8
2944 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2946 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2947 { 3, { 8, 2, 2 }, 0 }, // 12
2948 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2949 { 3, { 8, -2, 8 }, 0 },
2950 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2951 { 1, { 16 }, 0 }, // 16
2952 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2953 { 2, { 16, 2 }, 0 },
2954 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2955 { 3, { 16, 2, 2 }, 0 }, // 20
2956 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2957 { 3, { 16, -2, 8 }, 0 },
2958 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2959 { 2, { 16, 8 }, 0 }, // 24
2960 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2961 { 3, { 16, 8, 2 }, 0 },
2962 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2963 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2964 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2965 { 3, { 16, -2, 16 }, 0 },
2967 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2968 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2969 However, the shift-and combiner code needs this entry here to be in
2970 terms of real shift insns. */
2971 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2974 /* Individual shift amounts for shift amounts < 16, up to three highmost
2975 bits might be clobbered. This is typically used when combined with some
2976 kind of sign or zero extension. */
2977 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2979 { 0, { 0 }, 0 }, // 0
2980 { 1, { 1 }, LSHR_CLOBBERS_T },
2982 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2983 { 2, { 2, 2 }, 0 }, // 4
2984 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 2, { 8, -2 }, 0 },
2986 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2987 { 1, { 8 }, 0 }, // 8
2988 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2990 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2991 { 3, { 8, 2, 2 }, 0 }, // 12
2992 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2993 { 2, { 16, -2 }, 0 },
2994 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2995 { 1, { 16 }, 0 }, // 16
2996 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2997 { 2, { 16, 2 }, 0 },
2998 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2999 { 3, { 16, 2, 2 }, 0 }, // 20
3000 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3001 { 3, { 16, -2, 8 }, 0 },
3002 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3003 { 2, { 16, 8 }, 0 }, // 24
3004 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3005 { 3, { 16, 8, 2 }, 0 },
3006 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3007 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3008 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3009 { 3, { 16, -2, 16 }, 0 },
3010 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3013 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3014 will clobber the T bit. */
3016 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3018 gcc_assert (CONST_INT_P (shift_amount));
3020 const int shift_amount_i = INTVAL (shift_amount) & 31;
3022 /* Special case for shift count of 31: use and-rotl sequence. */
3023 if (shift_amount_i == 31)
3026 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3027 & ASHL_CLOBBERS_T) != 0;
3030 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3031 instructions will clobber the T bit. */
3033 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3035 gcc_assert (CONST_INT_P (shift_amount));
3037 const int shift_amount_i = INTVAL (shift_amount) & 31;
3039 /* Special case for shift count of 31: use shll-movt sequence. */
3040 if (shift_amount_i == 31)
3043 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3044 & LSHR_CLOBBERS_T) != 0;
3047 /* Return true if it is potentially beneficial to use a dynamic shift
3048 instruction (shad / shar) instead of a combination of 1/2/8/16
3049 shift instructions for the specified shift count.
3050 If dynamic shifts are not available, always return false. */
3052 sh_dynamicalize_shift_p (rtx count)
3054 gcc_assert (CONST_INT_P (count));
3056 const int shift_amount_i = INTVAL (count) & 31;
3059 /* For left and right shifts, there are shorter 2 insn sequences for
3060 shift amounts of 31. */
3061 if (shift_amount_i == 31)
3064 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3066 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3069 /* Assuming we have a value that has been sign-extended by at least one bit,
3070 can we use the ext_shift_amounts with the last shift turned to an
3071 arithmetic shift to shift it by N without data loss, and quicker than by
3073 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3075 /* Return the cost of a shift. */
3084 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3086 if (GET_MODE (x) == DImode
3087 && CONST_INT_P (XEXP (x, 1))
3088 && INTVAL (XEXP (x, 1)) == 1)
3091 /* Everything else is invalid, because there is no pattern for it. */
3094 /* If shift by a non constant, then this will be expensive. */
3095 if (!CONST_INT_P (XEXP (x, 1)))
3096 return SH_DYNAMIC_SHIFT_COST;
3098 /* Otherwise, return the true cost in instructions. Cope with out of range
3099 shift counts more or less arbitrarily. */
3100 value = INTVAL (XEXP (x, 1)) & 31;
3102 if (GET_CODE (x) == ASHIFTRT)
3104 int cost = ashiftrt_insns[value];
3105 /* If dynamic shifts are available and profitable in this case, then we
3106 put the constant in a reg and use shad. */
3107 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3108 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3112 return ashl_lshr_seq[value].insn_count;
3115 /* Return the cost of an AND/XOR/IOR operation. */
3117 and_xor_ior_costs (rtx x, int code)
3119 /* On SH1-4 we have only max. SImode operations.
3120 Double the cost for modes > SImode. */
3121 const int cost_scale = !TARGET_SHMEDIA
3122 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3125 /* A logical operation with two registers is a single cycle
3127 if (!CONST_INT_P (XEXP (x, 1)))
3128 return 1 * cost_scale;
3130 int i = INTVAL (XEXP (x, 1));
3134 if (satisfies_constraint_I10 (XEXP (x, 1))
3135 || satisfies_constraint_J16 (XEXP (x, 1)))
3138 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3141 /* These constants are single cycle extu.[bw] instructions. */
3142 if ((i == 0xff || i == 0xffff) && code == AND)
3143 return 1 * cost_scale;
3144 /* Constants that can be used in an instruction as an immediate are
3145 a single cycle, but this requires r0, so make it a little more
3147 if (CONST_OK_FOR_K08 (i))
3148 return 2 * cost_scale;
3149 /* Constants that can be loaded with a mov immediate need one more cycle.
3150 This case is probably unnecessary. */
3151 if (CONST_OK_FOR_I08 (i))
3152 return 2 * cost_scale;
3153 /* Any other constant requires an additional 2 cycle pc-relative load.
3154 This case is probably unnecessary. */
3155 return 3 * cost_scale;
3158 /* Return the cost of an addition or a subtraction. */
3162 if (GET_MODE (x) == SImode)
3164 /* The addc or subc patterns will eventually become one or two
3165 instructions. Below are some costs for some of the patterns
3166 which combine would reject because the costs of the individual
3167 insns in the patterns are lower.
3169 FIXME: It would be much easier if we had something like insn cost
3170 attributes and the cost calculation machinery used those attributes
3171 in the first place. This would eliminate redundant recog-like C
3172 code to calculate costs of complex patterns. */
3173 rtx op0 = XEXP (x, 0);
3174 rtx op1 = XEXP (x, 1);
3176 if (GET_CODE (x) == PLUS)
3178 if (GET_CODE (op0) == AND
3179 && XEXP (op0, 1) == const1_rtx
3180 && (GET_CODE (op1) == PLUS
3181 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3184 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3185 && GET_CODE (op1) == LSHIFTRT
3186 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3191 /* On SH1-4 we have only max. SImode operations.
3192 Double the cost for modes > SImode. */
3193 const int cost_scale = !TARGET_SHMEDIA
3194 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3197 /* Adding a register is a single cycle insn. */
3198 if (REG_P (XEXP (x, 1))
3199 || GET_CODE (XEXP (x, 1)) == SUBREG)
3200 return 1 * cost_scale;
3202 /* Likewise for small constants. */
3203 if (CONST_INT_P (XEXP (x, 1))
3204 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3205 return 1 * cost_scale;
3208 switch (GET_CODE (XEXP (x, 1)))
3213 return TARGET_SHMEDIA64 ? 5 : 3;
3216 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3218 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3220 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3228 /* Any other constant requires a 2 cycle pc-relative load plus an
3230 return 3 * cost_scale;
3233 /* Return the cost of a multiply. */
3235 multcosts (rtx x ATTRIBUTE_UNUSED)
3237 if (sh_multcost >= 0)
3240 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3241 accept constants. Ideally, we would use a cost of one or two and
3242 add the cost of the operand, but disregard the latter when inside loops
3243 and loop invariant code motion is still to follow.
3244 Using a multiply first and splitting it later if it's a loss
3245 doesn't work because of different sign / zero extension semantics
3246 of multiplies vs. shifts. */
3247 return optimize_size ? 2 : 3;
3251 /* We have a mul insn, so we can never take more than the mul and the
3252 read of the mac reg, but count more because of the latency and extra
3259 /* If we're aiming at small code, then just count the number of
3260 insns in a multiply call sequence. */
3264 /* Otherwise count all the insns in the routine we'd be calling too. */
3268 /* Compute a (partial) cost for rtx X. Return true if the complete
3269 cost has been computed, and false if subexpressions should be
3270 scanned. In either case, *TOTAL contains the cost result. */
3272 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3273 int *total, bool speed ATTRIBUTE_UNUSED)
3277 /* The lower-subreg pass decides whether to split multi-word regs
3278 into individual regs by looking at the cost for a SET of certain
3279 modes with the following patterns:
3281 (set (reg) (const_int 0))
3282 On machines that support vector-move operations a multi-word move
3283 is the same cost as individual reg move. On SH there is no
3284 vector-move, so we have to provide the correct cost in the number
3285 of move insns to load/store the reg of the mode in question. */
3287 if (register_operand (SET_DEST (x), VOIDmode)
3288 && (register_operand (SET_SRC (x), VOIDmode)
3289 || satisfies_constraint_Z (SET_SRC (x))))
3291 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3292 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3293 / mov_insn_size (mode, TARGET_SH2A));
3298 /* The cost of a mem access is mainly the cost of the address mode. */
3300 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3304 /* The cost of a sign or zero extend depends on whether the source is a
3305 reg or a mem. In case of a mem take the address into acount. */
3307 if (REG_P (XEXP (x, 0)))
3309 *total = COSTS_N_INSNS (1);
3312 if (MEM_P (XEXP (x, 0)))
3314 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3315 GET_MODE (XEXP (x, 0)),
3316 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3322 if (REG_P (XEXP (x, 0)))
3324 *total = COSTS_N_INSNS (1);
3327 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3328 && (GET_MODE (XEXP (x, 0)) == QImode
3329 || GET_MODE (XEXP (x, 0)) == HImode))
3331 /* Handle SH2A's movu.b and movu.w insn. */
3332 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3333 GET_MODE (XEXP (x, 0)),
3334 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3339 /* mems for SFmode and DFmode can be inside a parallel due to
3340 the way the fpscr is handled. */
3342 for (int i = 0; i < XVECLEN (x, 0); i++)
3344 rtx xx = XVECEXP (x, 0, i);
3345 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3347 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3348 GET_MODE (XEXP (xx, 0)),
3349 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3352 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3354 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3355 GET_MODE (XEXP (xx, 1)),
3356 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3361 if (sh_1el_vec (x, VOIDmode))
3362 *total = outer_code != SET;
3363 else if (sh_rep_vec (x, VOIDmode))
3364 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3365 + (outer_code != SET));
3367 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3373 if (INTVAL (x) == 0)
3375 else if (outer_code == AND && and_operand ((x), DImode))
3377 else if ((outer_code == IOR || outer_code == XOR
3378 || outer_code == PLUS)
3379 && CONST_OK_FOR_I10 (INTVAL (x)))
3381 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3382 *total = COSTS_N_INSNS (outer_code != SET);
3383 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3384 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3385 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3386 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3388 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3391 if (CONST_OK_FOR_I08 (INTVAL (x)))
3393 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3394 && CONST_OK_FOR_K08 (INTVAL (x)))
3396 /* prepare_cmp_insn will force costly constants int registers before
3397 the cbranch[sd]i4 patterns can see them, so preserve potentially
3398 interesting ones not covered by I08 above. */
3399 else if (outer_code == COMPARE
3400 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3401 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3402 || INTVAL (x) == 0x7fffffff
3403 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3410 /* An and with a constant compared against zero is
3411 most likely going to be a TST #imm, R0 instruction.
3412 Notice that this does not catch the zero_extract variants from
3414 if (GET_CODE (XEXP (x, 0)) == AND
3415 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3425 /* This is most likely a clips.b or clips.w insn that is being made up
3428 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3429 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3430 && REG_P (XEXP (XEXP (x, 0), 0))
3431 && CONST_INT_P (XEXP (x, 1)))
3433 *total = COSTS_N_INSNS (1);
3442 if (TARGET_SHMEDIA64)
3443 *total = COSTS_N_INSNS (4);
3444 else if (TARGET_SHMEDIA32)
3445 *total = COSTS_N_INSNS (2);
3452 *total = COSTS_N_INSNS (4);
3453 /* prepare_cmp_insn will force costly constants int registers before
3454 the cbranchdi4 pattern can see them, so preserve potentially
3455 interesting ones. */
3456 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3463 /* FIXME: This looks broken. Only the last statement has any effect.
3464 Probably this could be folded with the PARALLEL case? */
3465 if (x == CONST0_RTX (GET_MODE (x)))
3467 else if (sh_1el_vec (x, VOIDmode))
3468 *total = outer_code != SET;
3469 if (sh_rep_vec (x, VOIDmode))
3470 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3471 + (outer_code != SET));
3472 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3477 *total = COSTS_N_INSNS (addsubcosts (x));
3483 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3487 *total = COSTS_N_INSNS (multcosts (x));
3492 /* div0s sign comparison. */
3493 if (GET_CODE (XEXP (x, 0)) == XOR
3494 && REG_P ((XEXP (XEXP (x, 0), 0)))
3495 && REG_P ((XEXP (XEXP (x, 0), 1)))
3496 && satisfies_constraint_Z (XEXP (x, 1)))
3498 *total = COSTS_N_INSNS (1);
3505 /* div0s sign comparison. */
3506 if (GET_CODE (XEXP (x, 0)) == XOR
3507 && REG_P ((XEXP (XEXP (x, 0), 0)))
3508 && REG_P ((XEXP (XEXP (x, 0), 1)))
3509 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3511 *total = COSTS_N_INSNS (1);
3514 /* Fall through to shiftcosts. */
3518 int cost = shiftcosts (x);
3521 *total = COSTS_N_INSNS (cost);
3529 *total = COSTS_N_INSNS (20);
3542 /* Determine the size of the fundamental move insn that will be used
3543 for the specified mode. */
3545 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3547 const int mode_sz = GET_MODE_SIZE (mode);
3549 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3550 || (TARGET_FMOVD && mode == DFmode))
3554 /* The max. available mode for actual move insns is SImode.
3555 Larger accesses will be split into multiple loads/stores. */
3556 const int max_mov_sz = GET_MODE_SIZE (SImode);
3557 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3561 /* Determine the maximum possible displacement for a move insn for the
3564 max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
3566 /* The 4 byte displacement move insns are the same as the 2 byte
3567 versions but take a 12 bit displacement. All we need to do is to
3568 scale the max. displacement value accordingly. */
3569 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3571 /* SH2A supports FPU move insns with 12 bit displacements.
3572 Other variants to do not support any kind of displacements for
3574 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3578 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3579 const int mode_sz = GET_MODE_SIZE (mode);
3580 int r = 15 * mov_insn_sz * disp_scale;
3582 /* If the mov insn will be split into multiple loads/stores, the
3583 maximum possible displacement is a bit smaller. */
3584 if (mode_sz > mov_insn_sz)
3585 r -= mode_sz - mov_insn_sz;
3590 /* Determine the alignment mask for a move insn of the
3593 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3595 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3596 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3599 /* Return the displacement value of a displacement address. */
3600 static inline HOST_WIDE_INT
3601 disp_addr_displacement (rtx x)
3603 gcc_assert (satisfies_constraint_Sdd (x));
3604 return INTVAL (XEXP (XEXP (x, 0), 1));
3607 /* Compute the cost of an address. */
3609 sh_address_cost (rtx x, enum machine_mode mode,
3610 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3612 /* 'GBR + 0'. Account one more because of R0 restriction. */
3613 if (REG_P (x) && REGNO (x) == GBR_REG)
3616 /* Simple reg, post-inc, pre-dec addressing. */
3617 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3620 /* 'reg + disp' addressing. */
3621 if (GET_CODE (x) == PLUS
3622 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3624 /* 'GBR + disp'. Account one more because of R0 restriction. */
3625 if (REGNO (XEXP (x, 0)) == GBR_REG
3626 && gbr_displacement (XEXP (x, 1), mode))
3629 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3634 /* The displacement would fit into a 2 byte move insn.
3635 HImode and QImode loads/stores with displacement put pressure on
3636 R0 which will most likely require another reg copy. Thus account
3637 a higher cost for that. */
3638 if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
3639 return (mode == HImode || mode == QImode) ? 2 : 1;
3641 /* The displacement would fit into a 4 byte move insn (SH2A). */
3643 && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
3646 /* The displacement is probably out of range and will require extra
3651 /* 'reg + reg' addressing. Account a slightly higher cost because of
3652 increased pressure on R0. */
3653 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3654 && ! TARGET_SHMEDIA)
3657 /* Not sure what it is - probably expensive. */
3661 /* Code to expand a shift. */
3663 gen_ashift (int type, int n, rtx reg)
3667 /* Negative values here come from the shift_amounts array. */
3677 n_rtx = GEN_INT (n);
3678 gcc_assert (satisfies_constraint_P27 (n_rtx));
3683 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3687 emit_insn (gen_shlr (reg, reg));
3689 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3692 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3699 /* Code to expand a HImode shift. */
3701 gen_ashift_hi (int type, int n, rtx reg)
3703 /* Negative values here come from the shift_amounts array. */
3717 /* We don't have HImode right shift operations because using the
3718 ordinary 32 bit shift instructions for that doesn't generate proper
3719 zero/sign extension.
3720 gen_ashift_hi is only called in contexts where we know that the
3721 sign extension works out correctly. */
3724 if (GET_CODE (reg) == SUBREG)
3726 offset = SUBREG_BYTE (reg);
3727 reg = SUBREG_REG (reg);
3729 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3733 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3738 /* Output RTL to split a constant shift into its component SH constant
3739 shift instructions. */
3741 gen_shifty_op (int code, rtx *operands)
3743 int value = INTVAL (operands[2]);
3746 /* Truncate the shift count in case it is out of bounds. */
3751 if (code == LSHIFTRT)
3753 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3754 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3757 else if (code == ASHIFT)
3759 /* There is a two instruction sequence for 31 bit left shifts,
3760 but it requires r0. */
3761 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3763 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3764 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3769 else if (value == 0)
3771 /* This can happen even when optimizing, if there were subregs before
3772 reload. Don't output a nop here, as this is never optimized away;
3773 use a no-op move instead. */
3774 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3778 max = ashl_lshr_seq[value].insn_count;
3779 for (i = 0; i < max; i++)
3780 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3783 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3786 gen_shifty_hi_op (int code, rtx *operands)
3788 int value = INTVAL (operands[2]);
3790 void (*gen_fun) (int, int, rtx);
3792 /* This operation is used by and_shl for SImode values with a few
3793 high bits known to be cleared. */
3797 emit_insn (gen_nop ());
3801 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3804 max = ext_ashl_lshr_seq[value].insn_count;
3805 for (i = 0; i < max; i++)
3806 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3809 /* When shifting right, emit the shifts in reverse order, so that
3810 solitary negative values come first. */
3811 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3812 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3815 /* Output RTL for an arithmetic right shift.
3816 ??? Rewrite to use super-optimizer sequences. */
3818 expand_ashiftrt (rtx *operands)
3824 if (TARGET_DYNSHIFT)
3826 if (!CONST_INT_P (operands[2]))
3828 rtx count = copy_to_mode_reg (SImode, operands[2]);
3829 emit_insn (gen_negsi2 (count, count));
3830 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3833 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3834 > 1 + SH_DYNAMIC_SHIFT_COST)
3837 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3838 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3842 if (!CONST_INT_P (operands[2]))
3845 value = INTVAL (operands[2]) & 31;
3849 /* If we are called from abs expansion, arrange things so that we
3850 we can use a single MT instruction that doesn't clobber the source,
3851 if LICM can hoist out the load of the constant zero. */
3852 if (currently_expanding_to_rtl)
3854 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3856 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3859 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3862 else if (value >= 16 && value <= 19)
3864 wrk = gen_reg_rtx (SImode);
3865 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3868 gen_ashift (ASHIFTRT, 1, wrk);
3869 emit_move_insn (operands[0], wrk);
3872 /* Expand a short sequence inline, longer call a magic routine. */
3873 else if (value <= 5)
3875 wrk = gen_reg_rtx (SImode);
3876 emit_move_insn (wrk, operands[1]);
3878 gen_ashift (ASHIFTRT, 1, wrk);
3879 emit_move_insn (operands[0], wrk);
3883 wrk = gen_reg_rtx (Pmode);
3885 /* Load the value into an arg reg and call a helper. */
3886 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3887 sprintf (func, "__ashiftrt_r4_%d", value);
3888 function_symbol (wrk, func, SFUNC_STATIC);
3889 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3890 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3894 /* Try to find a good way to implement the combiner pattern
3895 [(set (match_operand:SI 0 "register_operand" "r")
3896 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3897 (match_operand:SI 2 "const_int_operand" "n"))
3898 (match_operand:SI 3 "const_int_operand" "n"))) .
3899 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3900 return 0 for simple right / left or left/right shift combination.
3901 return 1 for a combination of shifts with zero_extend.
3902 return 2 for a combination of shifts with an AND that needs r0.
3903 return 3 for a combination of shifts with an AND that needs an extra
3904 scratch register, when the three highmost bits of the AND mask are clear.
3905 return 4 for a combination of shifts with an AND that needs an extra
3906 scratch register, when any of the three highmost bits of the AND mask
3908 If ATTRP is set, store an initial right shift width in ATTRP[0],
3909 and the instruction length in ATTRP[1] . These values are not valid
3911 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3912 shift_amounts for the last shift value that is to be used before the
3915 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3917 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3918 int left = INTVAL (left_rtx), right;
3920 int cost, best_cost = 10000;
3921 int best_right = 0, best_len = 0;
3925 if (left < 0 || left > 31)
3927 if (CONST_INT_P (mask_rtx))
3928 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3930 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3931 /* Can this be expressed as a right shift / left shift pair? */
3932 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3933 right = exact_log2 (lsb);
3934 mask2 = ~(mask + lsb - 1);
3935 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3936 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3938 best_cost = ashl_lshr_seq[right].insn_count
3939 + ashl_lshr_seq[right + left].insn_count;
3940 /* mask has no trailing zeroes <==> ! right */
3941 else if (! right && mask2 == ~(lsb2 - 1))
3943 int late_right = exact_log2 (lsb2);
3944 best_cost = ashl_lshr_seq[left + late_right].insn_count
3945 + ashl_lshr_seq[late_right].insn_count;
3947 /* Try to use zero extend. */
3948 if (mask2 == ~(lsb2 - 1))
3952 for (width = 8; width <= 16; width += 8)
3954 /* Can we zero-extend right away? */
3955 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3957 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3958 + ext_ashl_lshr_seq[left + right].insn_count;
3959 if (cost < best_cost)
3970 /* ??? Could try to put zero extend into initial right shift,
3971 or even shift a bit left before the right shift. */
3972 /* Determine value of first part of left shift, to get to the
3973 zero extend cut-off point. */
3974 first = width - exact_log2 (lsb2) + right;
3975 if (first >= 0 && right + left - first >= 0)
3977 cost = ext_ashl_lshr_seq[right].insn_count
3978 + ext_ashl_lshr_seq[first].insn_count + 1
3979 + ext_ashl_lshr_seq[right + left - first].insn_count;
3981 if (cost < best_cost)
3993 /* Try to use r0 AND pattern */
3994 for (i = 0; i <= 2; i++)
3998 if (! CONST_OK_FOR_K08 (mask >> i))
4000 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4001 if (cost < best_cost)
4006 best_len = cost - 1;
4009 /* Try to use a scratch register to hold the AND operand. */
4010 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4011 for (i = 0; i <= 2; i++)
4015 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4018 : ashl_lshr_seq)[left + i].insn_count;
4019 if (cost < best_cost)
4024 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4030 attrp[0] = best_right;
4031 attrp[1] = best_len;
4036 /* This is used in length attributes of the unnamed instructions
4037 corresponding to shl_and_kind return values of 1 and 2. */
4039 shl_and_length (rtx insn)
4041 rtx set_src, left_rtx, mask_rtx;
4044 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4045 left_rtx = XEXP (XEXP (set_src, 0), 1);
4046 mask_rtx = XEXP (set_src, 1);
4047 shl_and_kind (left_rtx, mask_rtx, attributes);
4048 return attributes[1];
4051 /* This is used in length attribute of the and_shl_scratch instruction. */
4053 shl_and_scr_length (rtx insn)
4055 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4056 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4057 rtx op = XEXP (set_src, 0);
4058 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4059 op = XEXP (XEXP (op, 0), 0);
4060 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4063 /* Generate rtl for instructions for which shl_and_kind advised a particular
4064 method of generating them, i.e. returned zero. */
4066 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4069 unsigned HOST_WIDE_INT mask;
4070 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4071 int right, total_shift;
4072 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4074 right = attributes[0];
4075 total_shift = INTVAL (left_rtx) + right;
4076 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4083 int first = attributes[2];
4088 emit_insn ((mask << right) <= 0xff
4089 ? gen_zero_extendqisi2 (dest,
4090 gen_lowpart (QImode, source))
4091 : gen_zero_extendhisi2 (dest,
4092 gen_lowpart (HImode, source)));
4096 emit_insn (gen_movsi (dest, source));
4100 operands[2] = GEN_INT (right);
4101 gen_shifty_hi_op (LSHIFTRT, operands);
4105 operands[2] = GEN_INT (first);
4106 gen_shifty_hi_op (ASHIFT, operands);
4107 total_shift -= first;
4111 emit_insn (mask <= 0xff
4112 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4113 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4114 if (total_shift > 0)
4116 operands[2] = GEN_INT (total_shift);
4117 gen_shifty_hi_op (ASHIFT, operands);
4122 shift_gen_fun = gen_shifty_op;
4124 /* If the topmost bit that matters is set, set the topmost bits
4125 that don't matter. This way, we might be able to get a shorter
4127 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4128 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4130 /* Don't expand fine-grained when combining, because that will
4131 make the pattern fail. */
4132 if (currently_expanding_to_rtl
4133 || reload_in_progress || reload_completed)
4137 /* Cases 3 and 4 should be handled by this split
4138 only while combining */
4139 gcc_assert (kind <= 2);
4142 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4145 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4150 operands[2] = GEN_INT (total_shift);
4151 shift_gen_fun (ASHIFT, operands);
4158 if (kind != 4 && total_shift < 16)
4160 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4162 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4166 emit_insn (gen_and_shl_scratch (dest, source,
4169 GEN_INT (total_shift + neg),
4171 emit_insn (gen_movsi (dest, dest));
4178 /* Try to find a good way to implement the combiner pattern
4179 [(set (match_operand:SI 0 "register_operand" "=r")
4180 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4181 (match_operand:SI 2 "const_int_operand" "n")
4182 (match_operand:SI 3 "const_int_operand" "n")
4184 (clobber (reg:SI T_REG))]
4185 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4186 return 0 for simple left / right shift combination.
4187 return 1 for left shift / 8 bit sign extend / left shift.
4188 return 2 for left shift / 16 bit sign extend / left shift.
4189 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4190 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4191 return 5 for left shift / 16 bit sign extend / right shift
4192 return 6 for < 8 bit sign extend / left shift.
4193 return 7 for < 8 bit sign extend / left shift / single right shift.
4194 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4196 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4198 int left, size, insize, ext;
4199 int cost = 0, best_cost;
4202 left = INTVAL (left_rtx);
4203 size = INTVAL (size_rtx);
4204 insize = size - left;
4205 gcc_assert (insize > 0);
4206 /* Default to left / right shift. */
4208 best_cost = ashl_lshr_seq[32 - insize].insn_count
4209 + ashl_lshr_seq[32 - size].insn_count;
4212 /* 16 bit shift / sign extend / 16 bit shift */
4213 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4214 + ashl_lshr_seq[16 - size].insn_count;
4215 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4216 below, by alternative 3 or something even better. */
4217 if (cost < best_cost)
4223 /* Try a plain sign extend between two shifts. */
4224 for (ext = 16; ext >= insize; ext -= 8)
4228 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4229 + ashl_lshr_seq[size - ext].insn_count;
4230 if (cost < best_cost)
4232 kind = ext / (unsigned) 8;
4236 /* Check if we can do a sloppy shift with a final signed shift
4237 restoring the sign. */
4238 if (EXT_SHIFT_SIGNED (size - ext))
4239 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4240 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4241 /* If not, maybe it's still cheaper to do the second shift sloppy,
4242 and do a final sign extend? */
4243 else if (size <= 16)
4244 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4245 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4249 if (cost < best_cost)
4251 kind = ext / (unsigned) 8 + 2;
4255 /* Check if we can sign extend in r0 */
4258 cost = 3 + ashl_lshr_seq[left].insn_count;
4259 if (cost < best_cost)
4264 /* Try the same with a final signed shift. */
4267 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4268 if (cost < best_cost)
4275 if (TARGET_DYNSHIFT)
4277 /* Try to use a dynamic shift. */
4278 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4279 if (cost < best_cost)
4290 /* Function to be used in the length attribute of the instructions
4291 implementing this pattern. */
4293 shl_sext_length (rtx insn)
4295 rtx set_src, left_rtx, size_rtx;
4298 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4299 left_rtx = XEXP (XEXP (set_src, 0), 1);
4300 size_rtx = XEXP (set_src, 1);
4301 shl_sext_kind (left_rtx, size_rtx, &cost);
4305 /* Generate rtl for this pattern */
4307 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4310 int left, size, insize, cost;
4313 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4314 left = INTVAL (left_rtx);
4315 size = INTVAL (size_rtx);
4316 insize = size - left;
4324 int ext = kind & 1 ? 8 : 16;
4325 int shift2 = size - ext;
4327 /* Don't expand fine-grained when combining, because that will
4328 make the pattern fail. */
4329 if (! currently_expanding_to_rtl
4330 && ! reload_in_progress && ! reload_completed)
4332 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4333 emit_insn (gen_movsi (dest, source));
4337 emit_insn (gen_movsi (dest, source));
4341 operands[2] = GEN_INT (ext - insize);
4342 gen_shifty_hi_op (ASHIFT, operands);
4345 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4346 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4351 operands[2] = GEN_INT (shift2);
4352 gen_shifty_op (ASHIFT, operands);
4359 if (EXT_SHIFT_SIGNED (shift2))
4361 operands[2] = GEN_INT (shift2 + 1);
4362 gen_shifty_op (ASHIFT, operands);
4363 operands[2] = const1_rtx;
4364 gen_shifty_op (ASHIFTRT, operands);
4367 operands[2] = GEN_INT (shift2);
4368 gen_shifty_hi_op (ASHIFT, operands);
4372 operands[2] = GEN_INT (-shift2);
4373 gen_shifty_hi_op (LSHIFTRT, operands);
4375 emit_insn (size <= 8
4376 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4377 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4384 if (! currently_expanding_to_rtl
4385 && ! reload_in_progress && ! reload_completed)
4386 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4390 operands[2] = GEN_INT (16 - insize);
4391 gen_shifty_hi_op (ASHIFT, operands);
4392 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4394 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4396 gen_ashift (ASHIFTRT, 1, dest);
4401 /* Don't expand fine-grained when combining, because that will
4402 make the pattern fail. */
4403 if (! currently_expanding_to_rtl
4404 && ! reload_in_progress && ! reload_completed)
4406 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4407 emit_insn (gen_movsi (dest, source));
4410 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4411 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4412 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4414 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4415 gen_shifty_op (ASHIFT, operands);
4417 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4425 /* Prefix a symbol_ref name with "datalabel". */
4427 gen_datalabel_ref (rtx sym)
4431 if (GET_CODE (sym) == LABEL_REF)
4432 return gen_rtx_CONST (GET_MODE (sym),
4433 gen_rtx_UNSPEC (GET_MODE (sym),
4437 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4439 str = XSTR (sym, 0);
4440 /* Share all SYMBOL_REF strings with the same value - that is important
4442 str = IDENTIFIER_POINTER (get_identifier (str));
4443 XSTR (sym, 0) = str;
4449 static alloc_pool label_ref_list_pool;
4451 typedef struct label_ref_list_d
4454 struct label_ref_list_d *next;
4455 } *label_ref_list_t;
4457 /* The SH cannot load a large constant into a register, constants have to
4458 come from a pc relative load. The reference of a pc relative load
4459 instruction must be less than 1k in front of the instruction. This
4460 means that we often have to dump a constant inside a function, and
4461 generate code to branch around it.
4463 It is important to minimize this, since the branches will slow things
4464 down and make things bigger.
4466 Worst case code looks like:
4484 We fix this by performing a scan before scheduling, which notices which
4485 instructions need to have their operands fetched from the constant table
4486 and builds the table.
4490 scan, find an instruction which needs a pcrel move. Look forward, find the
4491 last barrier which is within MAX_COUNT bytes of the requirement.
4492 If there isn't one, make one. Process all the instructions between
4493 the find and the barrier.
4495 In the above example, we can tell that L3 is within 1k of L1, so
4496 the first move can be shrunk from the 3 insn+constant sequence into
4497 just 1 insn, and the constant moved to L3 to make:
4508 Then the second move becomes the target for the shortening process. */
4512 rtx value; /* Value in table. */
4513 rtx label; /* Label of value. */
4514 label_ref_list_t wend; /* End of window. */
4515 enum machine_mode mode; /* Mode of value. */
4517 /* True if this constant is accessed as part of a post-increment
4518 sequence. Note that HImode constants are never accessed in this way. */
4519 bool part_of_sequence_p;
4522 /* The maximum number of constants that can fit into one pool, since
4523 constants in the range 0..510 are at least 2 bytes long, and in the
4524 range from there to 1018 at least 4 bytes. */
4526 #define MAX_POOL_SIZE 372
4527 static pool_node pool_vector[MAX_POOL_SIZE];
4528 static int pool_size;
4529 static rtx pool_window_label;
4530 static int pool_window_last;
4532 static int max_labelno_before_reorg;
4534 /* ??? If we need a constant in HImode which is the truncated value of a
4535 constant we need in SImode, we could combine the two entries thus saving
4536 two bytes. Is this common enough to be worth the effort of implementing
4539 /* ??? This stuff should be done at the same time that we shorten branches.
4540 As it is now, we must assume that all branches are the maximum size, and
4541 this causes us to almost always output constant pools sooner than
4544 /* Add a constant to the pool and return its label. */
4546 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4550 label_ref_list_t ref, newref;
4552 /* First see if we've already got it. */
4553 for (i = 0; i < pool_size; i++)
4555 if (x->code == pool_vector[i].value->code
4556 && mode == pool_vector[i].mode)
4558 if (x->code == CODE_LABEL)
4560 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4563 if (rtx_equal_p (x, pool_vector[i].value))
4568 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4570 new_rtx = gen_label_rtx ();
4571 LABEL_REFS (new_rtx) = pool_vector[i].label;
4572 pool_vector[i].label = lab = new_rtx;
4574 if (lab && pool_window_label)
4576 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4577 newref->label = pool_window_label;
4578 ref = pool_vector[pool_window_last].wend;
4580 pool_vector[pool_window_last].wend = newref;
4583 pool_window_label = new_rtx;
4584 pool_window_last = i;
4590 /* Need a new one. */
4591 pool_vector[pool_size].value = x;
4592 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4595 pool_vector[pool_size - 1].part_of_sequence_p = true;
4598 lab = gen_label_rtx ();
4599 pool_vector[pool_size].mode = mode;
4600 pool_vector[pool_size].label = lab;
4601 pool_vector[pool_size].wend = NULL;
4602 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4603 if (lab && pool_window_label)
4605 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4606 newref->label = pool_window_label;
4607 ref = pool_vector[pool_window_last].wend;
4609 pool_vector[pool_window_last].wend = newref;
4612 pool_window_label = lab;
4613 pool_window_last = pool_size;
4618 /* Output the literal table. START, if nonzero, is the first instruction
4619 this table is needed for, and also indicates that there is at least one
4620 casesi_worker_2 instruction; We have to emit the operand3 labels from
4621 these insns at a 4-byte aligned position. BARRIER is the barrier
4622 after which we are to place the table. */
4624 dump_table (rtx start, rtx barrier)
4628 bool need_align = true;
4630 label_ref_list_t ref;
4631 bool have_df = false;
4633 /* Do two passes, first time dump out the HI sized constants. */
4635 for (i = 0; i < pool_size; i++)
4637 pool_node *p = &pool_vector[i];
4639 if (p->mode == HImode)
4643 scan = emit_insn_after (gen_align_2 (), scan);
4646 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4647 scan = emit_label_after (lab, scan);
4648 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4650 for (ref = p->wend; ref; ref = ref->next)
4653 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4656 else if (p->mode == DFmode)
4664 scan = emit_insn_after (gen_align_4 (), scan);
4666 for (; start != barrier; start = NEXT_INSN (start))
4667 if (NONJUMP_INSN_P (start)
4668 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4670 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4671 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4673 scan = emit_label_after (lab, scan);
4676 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4678 rtx align_insn = NULL_RTX;
4680 scan = emit_label_after (gen_label_rtx (), scan);
4681 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4684 for (i = 0; i < pool_size; i++)
4686 pool_node *p = &pool_vector[i];
4694 if (align_insn && !p->part_of_sequence_p)
4696 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4697 emit_label_before (lab, align_insn);
4698 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4700 for (ref = p->wend; ref; ref = ref->next)
4703 emit_insn_before (gen_consttable_window_end (lab),
4706 delete_insn (align_insn);
4707 align_insn = NULL_RTX;
4712 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4713 scan = emit_label_after (lab, scan);
4714 scan = emit_insn_after (gen_consttable_4 (p->value,
4716 need_align = ! need_align;
4722 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4727 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4728 scan = emit_label_after (lab, scan);
4729 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4736 if (p->mode != HImode)
4738 for (ref = p->wend; ref; ref = ref->next)
4741 scan = emit_insn_after (gen_consttable_window_end (lab),
4750 for (i = 0; i < pool_size; i++)
4752 pool_node *p = &pool_vector[i];
4763 scan = emit_label_after (gen_label_rtx (), scan);
4764 scan = emit_insn_after (gen_align_4 (), scan);
4766 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4767 scan = emit_label_after (lab, scan);
4768 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4776 scan = emit_label_after (gen_label_rtx (), scan);
4777 scan = emit_insn_after (gen_align_4 (), scan);
4779 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4780 scan = emit_label_after (lab, scan);
4781 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4788 if (p->mode != HImode)
4790 for (ref = p->wend; ref; ref = ref->next)
4793 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4798 scan = emit_insn_after (gen_consttable_end (), scan);
4799 scan = emit_barrier_after (scan);
4801 pool_window_label = NULL_RTX;
4802 pool_window_last = 0;
4805 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4807 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4809 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4810 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4811 need to fix it if the input value is CONST_OK_FOR_I08. */
4813 broken_move (rtx insn)
4815 if (NONJUMP_INSN_P (insn))
4817 rtx pat = PATTERN (insn);
4818 if (GET_CODE (pat) == PARALLEL)
4819 pat = XVECEXP (pat, 0, 0);
4820 if (GET_CODE (pat) == SET
4821 /* We can load any 8-bit value if we don't care what the high
4822 order bits end up as. */
4823 && GET_MODE (SET_DEST (pat)) != QImode
4824 && (CONSTANT_P (SET_SRC (pat))
4825 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4826 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4827 /* Match mova_const. */
4828 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4829 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4830 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4832 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4833 && (fp_zero_operand (SET_SRC (pat))
4834 || fp_one_operand (SET_SRC (pat)))
4835 /* In general we don't know the current setting of fpscr, so
4837 There is an exception if this was a register-register move
4838 before reload - and hence it was ascertained that we have
4839 single precision setting - and in a post-reload optimization
4840 we changed this to do a constant load. In that case
4841 we don't have an r0 clobber, hence we must use fldi. */
4843 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4845 && REG_P (SET_DEST (pat))
4846 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4848 && GET_MODE (SET_DEST (pat)) == SImode
4849 && (satisfies_constraint_I20 (SET_SRC (pat))
4850 || satisfies_constraint_I28 (SET_SRC (pat))))
4851 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4858 /* Return true if the specified insn is a mova insn. */
4862 return (NONJUMP_INSN_P (insn)
4863 && GET_CODE (PATTERN (insn)) == SET
4864 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4865 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4866 /* Don't match mova_const. */
4867 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4870 /* Fix up a mova from a switch that went out of range. */
4872 fixup_mova (rtx mova)
4874 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4877 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4878 INSN_CODE (mova) = -1;
4883 rtx lab = gen_label_rtx ();
4884 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4888 worker = NEXT_INSN (worker);
4890 && !LABEL_P (worker)
4891 && !JUMP_P (worker));
4892 } while (NOTE_P (worker)
4893 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4894 wpat = PATTERN (worker);
4895 wpat0 = XVECEXP (wpat, 0, 0);
4896 wpat1 = XVECEXP (wpat, 0, 1);
4897 wsrc = SET_SRC (wpat0);
4898 PATTERN (worker) = (gen_casesi_worker_2
4899 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4900 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4902 INSN_CODE (worker) = -1;
4903 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4904 base = gen_rtx_LABEL_REF (Pmode, lab);
4905 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4906 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4907 INSN_CODE (mova) = -1;
4911 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4912 *num_mova, and check if the new mova is not nested within the first one.
4913 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4914 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4916 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4918 int n_addr = 0; /* Initialization to shut up spurious warning. */
4919 int f_target, n_target = 0; /* Likewise. */
4923 /* If NEW_MOVA has no address yet, it will be handled later. */
4924 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4927 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4928 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4929 if (n_addr > n_target || n_addr + 1022 < n_target)
4931 /* Change the mova into a load.
4932 broken_move will then return true for it. */
4933 fixup_mova (new_mova);
4939 *first_mova = new_mova;
4944 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4949 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4950 > n_target - n_addr)
4952 fixup_mova (*first_mova);
4957 fixup_mova (new_mova);
4962 /* Find the last barrier from insn FROM which is close enough to hold the
4963 constant pool. If we can't find one, then create one near the end of
4966 find_barrier (int num_mova, rtx mova, rtx from)
4975 int leading_mova = num_mova;
4976 rtx barrier_before_mova = NULL_RTX;
4977 rtx found_barrier = NULL_RTX;
4978 rtx good_barrier = NULL_RTX;
4982 rtx last_got = NULL_RTX;
4983 rtx last_symoff = NULL_RTX;
4985 /* For HImode: range is 510, add 4 because pc counts from address of
4986 second instruction after this one, subtract 2 for the jump instruction
4987 that we may need to emit before the table, subtract 2 for the instruction
4988 that fills the jump delay slot (in very rare cases, reorg will take an
4989 instruction from after the constant pool or will leave the delay slot
4990 empty). This gives 510.
4991 For SImode: range is 1020, add 4 because pc counts from address of
4992 second instruction after this one, subtract 2 in case pc is 2 byte
4993 aligned, subtract 2 for the jump instruction that we may need to emit
4994 before the table, subtract 2 for the instruction that fills the jump
4995 delay slot. This gives 1018. */
4997 /* The branch will always be shortened now that the reference address for
4998 forward branches is the successor address, thus we need no longer make
4999 adjustments to the [sh]i_limit for -O0. */
5004 while (from && count_si < si_limit && count_hi < hi_limit)
5006 int inc = get_attr_length (from);
5009 /* If this is a label that existed at the time of the compute_alignments
5010 call, determine the alignment. N.B. When find_barrier recurses for
5011 an out-of-reach mova, we might see labels at the start of previously
5012 inserted constant tables. */
5014 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5017 new_align = 1 << label_to_alignment (from);
5018 else if (BARRIER_P (prev_nonnote_insn (from)))
5019 new_align = 1 << barrier_align (from);
5024 /* In case we are scanning a constant table because of recursion, check
5025 for explicit alignments. If the table is long, we might be forced
5026 to emit the new table in front of it; the length of the alignment
5027 might be the last straw. */
5028 else if (NONJUMP_INSN_P (from)
5029 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5030 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5031 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5032 /* When we find the end of a constant table, paste the new constant
5033 at the end. That is better than putting it in front because
5034 this way, we don't need extra alignment for adding a 4-byte-aligned
5035 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5036 else if (NONJUMP_INSN_P (from)
5037 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5038 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5041 if (BARRIER_P (from))
5045 found_barrier = from;
5047 /* If we are at the end of the function, or in front of an alignment
5048 instruction, we need not insert an extra alignment. We prefer
5049 this kind of barrier. */
5050 if (barrier_align (from) > 2)
5051 good_barrier = from;
5053 /* If we are at the end of a hot/cold block, dump the constants
5055 next = NEXT_INSN (from);
5058 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5062 if (broken_move (from))
5065 enum machine_mode mode;
5067 pat = PATTERN (from);
5068 if (GET_CODE (pat) == PARALLEL)
5069 pat = XVECEXP (pat, 0, 0);
5070 src = SET_SRC (pat);
5071 dst = SET_DEST (pat);
5072 mode = GET_MODE (dst);
5074 /* GOT pcrelat setting comes in pair of
5077 instructions. (plus add r0,r12).
5078 Remember if we see one without the other. */
5079 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5080 last_got = last_got ? NULL_RTX : from;
5081 else if (PIC_ADDR_P (src))
5082 last_got = last_got ? NULL_RTX : from;
5084 /* We must explicitly check the mode, because sometimes the
5085 front end will generate code to load unsigned constants into
5086 HImode targets without properly sign extending them. */
5088 || (mode == SImode && satisfies_constraint_I16 (src)
5089 && REGNO (dst) != FPUL_REG))
5092 /* We put the short constants before the long constants, so
5093 we must count the length of short constants in the range
5094 for the long constants. */
5095 /* ??? This isn't optimal, but is easy to do. */
5100 /* We dump DF/DI constants before SF/SI ones, because
5101 the limit is the same, but the alignment requirements
5102 are higher. We may waste up to 4 additional bytes
5103 for alignment, and the DF/DI constant may have
5104 another SF/SI constant placed before it. */
5105 if (TARGET_SHCOMPACT
5107 && (mode == DFmode || mode == DImode))
5112 while (si_align > 2 && found_si + si_align - 2 > count_si)
5114 if (found_si > count_si)
5115 count_si = found_si;
5116 found_si += GET_MODE_SIZE (mode);
5118 si_limit -= GET_MODE_SIZE (mode);
5124 switch (untangle_mova (&num_mova, &mova, from))
5129 rtx src = SET_SRC (PATTERN (from));
5130 if (GET_CODE (src) == CONST
5131 && GET_CODE (XEXP (src, 0)) == UNSPEC
5132 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5136 case 0: return find_barrier (0, 0, mova);
5141 = good_barrier ? good_barrier : found_barrier;
5145 if (found_si > count_si)
5146 count_si = found_si;
5148 else if (JUMP_TABLE_DATA_P (from)
5149 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5151 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5153 && (prev_nonnote_insn (from)
5154 == XEXP (MOVA_LABELREF (mova), 0))))
5156 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5158 /* We have just passed the barrier in front of the
5159 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5160 the ADDR_DIFF_VEC is accessed as data, just like our pool
5161 constants, this is a good opportunity to accommodate what
5162 we have gathered so far.
5163 If we waited any longer, we could end up at a barrier in
5164 front of code, which gives worse cache usage for separated
5165 instruction / data caches. */
5166 good_barrier = found_barrier;
5171 rtx body = PATTERN (from);
5172 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5175 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5176 else if (JUMP_P (from)
5181 /* There is a possibility that a bf is transformed into a bf/s by the
5182 delay slot scheduler. */
5184 && get_attr_type (from) == TYPE_CBRANCH
5185 && ! sequence_insn_p (from))
5191 if (new_align > si_align)
5193 si_limit -= (count_si - 1) & (new_align - si_align);
5194 si_align = new_align;
5196 count_si = (count_si + new_align - 1) & -new_align;
5201 if (new_align > hi_align)
5203 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5204 hi_align = new_align;
5206 count_hi = (count_hi + new_align - 1) & -new_align;
5208 from = NEXT_INSN (from);
5215 /* Try as we might, the leading mova is out of range. Change
5216 it into a load (which will become a pcload) and retry. */
5218 return find_barrier (0, 0, mova);
5222 /* Insert the constant pool table before the mova instruction,
5223 to prevent the mova label reference from going out of range. */
5225 good_barrier = found_barrier = barrier_before_mova;
5231 if (good_barrier && next_real_insn (found_barrier))
5232 found_barrier = good_barrier;
5236 /* We didn't find a barrier in time to dump our stuff,
5237 so we'll make one. */
5238 rtx label = gen_label_rtx ();
5240 /* Don't emit a constant table in the middle of insns for
5241 casesi_worker_2. This is a bit overkill but is enough
5242 because casesi_worker_2 wouldn't appear so frequently. */
5246 /* If we exceeded the range, then we must back up over the last
5247 instruction we looked at. Otherwise, we just need to undo the
5248 NEXT_INSN at the end of the loop. */
5249 if (PREV_INSN (from) != orig
5250 && (count_hi > hi_limit || count_si > si_limit))
5251 from = PREV_INSN (PREV_INSN (from));
5253 from = PREV_INSN (from);
5255 /* Don't emit a constant table int the middle of global pointer setting,
5256 since that that would move the addressing base GOT into another table.
5257 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5258 in the pool anyway, so just move up the whole constant pool.
5260 However, avoid doing so when the last single GOT mov is the starting
5261 insn itself. Going past above the start insn would create a negative
5262 offset, causing errors. */
5263 if (last_got && last_got != orig)
5264 from = PREV_INSN (last_got);
5266 /* Don't insert the constant pool table at the position which
5267 may be the landing pad. */
5270 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5271 from = PREV_INSN (from);
5273 /* Walk back to be just before any jump or label.
5274 Putting it before a label reduces the number of times the branch
5275 around the constant pool table will be hit. Putting it before
5276 a jump makes it more likely that the bra delay slot will be
5278 while (NOTE_P (from) || JUMP_P (from)
5280 from = PREV_INSN (from);
5282 /* Make sure we do not split between a call and its corresponding
5283 CALL_ARG_LOCATION note. */
5286 rtx next = NEXT_INSN (from);
5287 if (next && NOTE_P (next)
5288 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5292 from = emit_jump_insn_after (gen_jump (label), from);
5293 JUMP_LABEL (from) = label;
5294 LABEL_NUSES (label) = 1;
5295 found_barrier = emit_barrier_after (from);
5296 emit_label_after (label, found_barrier);
5299 return found_barrier;
5302 /* If the instruction INSN is implemented by a special function, and we can
5303 positively find the register that is used to call the sfunc, and this
5304 register is not used anywhere else in this instruction - except as the
5305 destination of a set, return this register; else, return 0. */
5307 sfunc_uses_reg (rtx insn)
5310 rtx pattern, part, reg_part, reg;
5312 if (!NONJUMP_INSN_P (insn))
5314 pattern = PATTERN (insn);
5315 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5318 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5320 part = XVECEXP (pattern, 0, i);
5321 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5326 reg = XEXP (reg_part, 0);
5327 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5329 part = XVECEXP (pattern, 0, i);
5330 if (part == reg_part || GET_CODE (part) == CLOBBER)
5332 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5333 && REG_P (SET_DEST (part)))
5334 ? SET_SRC (part) : part)))
5340 /* See if the only way in which INSN uses REG is by calling it, or by
5341 setting it while calling it. Set *SET to a SET rtx if the register
5344 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5350 reg2 = sfunc_uses_reg (insn);
5351 if (reg2 && REGNO (reg2) == REGNO (reg))
5353 pattern = single_set (insn);
5355 && REG_P (SET_DEST (pattern))
5356 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5362 /* We don't use rtx_equal_p because we don't care if the mode is
5364 pattern = single_set (insn);
5366 && REG_P (SET_DEST (pattern))
5367 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5373 par = PATTERN (insn);
5374 if (GET_CODE (par) == PARALLEL)
5375 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5377 part = XVECEXP (par, 0, i);
5378 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5381 return reg_mentioned_p (reg, SET_SRC (pattern));
5387 pattern = PATTERN (insn);
5389 if (GET_CODE (pattern) == PARALLEL)
5393 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5394 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5396 pattern = XVECEXP (pattern, 0, 0);
5399 if (GET_CODE (pattern) == SET)
5401 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5403 /* We don't use rtx_equal_p, because we don't care if the
5404 mode is different. */
5405 if (!REG_P (SET_DEST (pattern))
5406 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5412 pattern = SET_SRC (pattern);
5415 if (GET_CODE (pattern) != CALL
5416 || !MEM_P (XEXP (pattern, 0))
5417 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5423 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5424 general registers. Bits 0..15 mean that the respective registers
5425 are used as inputs in the instruction. Bits 16..31 mean that the
5426 registers 0..15, respectively, are used as outputs, or are clobbered.
5427 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5429 regs_used (rtx x, int is_dest)
5437 code = GET_CODE (x);
5442 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5443 << (REGNO (x) + is_dest));
5447 rtx y = SUBREG_REG (x);
5452 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5454 subreg_regno_offset (REGNO (y),
5457 GET_MODE (x)) + is_dest));
5461 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5463 /* If there was a return value, it must have been indicated with USE. */
5478 fmt = GET_RTX_FORMAT (code);
5480 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5485 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5486 used |= regs_used (XVECEXP (x, i, j), is_dest);
5488 else if (fmt[i] == 'e')
5489 used |= regs_used (XEXP (x, i), is_dest);
5494 /* Create an instruction that prevents redirection of a conditional branch
5495 to the destination of the JUMP with address ADDR.
5496 If the branch needs to be implemented as an indirect jump, try to find
5497 a scratch register for it.
5498 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5499 If any preceding insn that doesn't fit into a delay slot is good enough,
5500 pass 1. Pass 2 if a definite blocking insn is needed.
5501 -1 is used internally to avoid deep recursion.
5502 If a blocking instruction is made or recognized, return it. */
5504 gen_block_redirect (rtx jump, int addr, int need_block)
5507 rtx prev = prev_nonnote_insn (jump);
5510 /* First, check if we already have an instruction that satisfies our need. */
5511 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5513 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5515 if (GET_CODE (PATTERN (prev)) == USE
5516 || GET_CODE (PATTERN (prev)) == CLOBBER
5517 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5519 else if ((need_block &= ~1) < 0)
5521 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5524 if (GET_CODE (PATTERN (jump)) == RETURN)
5528 /* Reorg even does nasty things with return insns that cause branches
5529 to go out of range - see find_end_label and callers. */
5530 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5532 /* We can't use JUMP_LABEL here because it might be undefined
5533 when not optimizing. */
5534 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5535 /* If the branch is out of range, try to find a scratch register for it. */
5537 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5541 /* Don't look for the stack pointer as a scratch register,
5542 it would cause trouble if an interrupt occurred. */
5543 unsigned attempt = 0x7fff, used;
5544 int jump_left = flag_expensive_optimizations + 1;
5546 /* It is likely that the most recent eligible instruction is wanted for
5547 the delay slot. Therefore, find out which registers it uses, and
5548 try to avoid using them. */
5550 for (scan = jump; (scan = PREV_INSN (scan)); )
5554 if (INSN_DELETED_P (scan))
5556 code = GET_CODE (scan);
5557 if (code == CODE_LABEL || code == JUMP_INSN)
5560 && GET_CODE (PATTERN (scan)) != USE
5561 && GET_CODE (PATTERN (scan)) != CLOBBER
5562 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5564 attempt &= ~regs_used (PATTERN (scan), 0);
5568 for (used = dead = 0, scan = JUMP_LABEL (jump);
5569 (scan = NEXT_INSN (scan)); )
5573 if (INSN_DELETED_P (scan))
5575 code = GET_CODE (scan);
5578 used |= regs_used (PATTERN (scan), 0);
5579 if (code == CALL_INSN)
5580 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5581 dead |= (used >> 16) & ~used;
5587 if (code == JUMP_INSN)
5589 if (jump_left-- && simplejump_p (scan))
5590 scan = JUMP_LABEL (scan);
5596 /* Mask out the stack pointer again, in case it was
5597 the only 'free' register we have found. */
5600 /* If the immediate destination is still in range, check for possible
5601 threading with a jump beyond the delay slot insn.
5602 Don't check if we are called recursively; the jump has been or will be
5603 checked in a different invocation then. */
5605 else if (optimize && need_block >= 0)
5607 rtx next = next_active_insn (next_active_insn (dest));
5608 if (next && JUMP_P (next)
5609 && GET_CODE (PATTERN (next)) == SET
5610 && recog_memoized (next) == CODE_FOR_jump_compact)
5612 dest = JUMP_LABEL (next);
5614 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5616 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5622 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5624 /* It would be nice if we could convert the jump into an indirect
5625 jump / far branch right now, and thus exposing all constituent
5626 instructions to further optimization. However, reorg uses
5627 simplejump_p to determine if there is an unconditional jump where
5628 it should try to schedule instructions from the target of the
5629 branch; simplejump_p fails for indirect jumps even if they have
5631 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5632 (reg, GEN_INT (unspec_bbr_uid++)),
5634 /* ??? We would like this to have the scope of the jump, but that
5635 scope will change when a delay slot insn of an inner scope is added.
5636 Hence, after delay slot scheduling, we'll have to expect
5637 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5640 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5641 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5644 else if (need_block)
5645 /* We can't use JUMP_LABEL here because it might be undefined
5646 when not optimizing. */
5647 return emit_insn_before (gen_block_branch_redirect
5648 (GEN_INT (unspec_bbr_uid++)),
5653 #define CONDJUMP_MIN -252
5654 #define CONDJUMP_MAX 262
5657 /* A label (to be placed) in front of the jump
5658 that jumps to our ultimate destination. */
5660 /* Where we are going to insert it if we cannot move the jump any farther,
5661 or the jump itself if we have picked up an existing jump. */
5663 /* The ultimate destination. */
5665 struct far_branch *prev;
5666 /* If the branch has already been created, its address;
5667 else the address of its first prospective user. */
5671 static void gen_far_branch (struct far_branch *);
5672 enum mdep_reorg_phase_e mdep_reorg_phase;
5674 gen_far_branch (struct far_branch *bp)
5676 rtx insn = bp->insert_place;
5678 rtx label = gen_label_rtx ();
5681 emit_label_after (label, insn);
5684 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5685 LABEL_NUSES (bp->far_label)++;
5688 jump = emit_jump_insn_after (gen_return (), insn);
5690 /* Emit a barrier so that reorg knows that any following instructions
5691 are not reachable via a fall-through path.
5692 But don't do this when not optimizing, since we wouldn't suppress the
5693 alignment for the barrier then, and could end up with out-of-range
5694 pc-relative loads. */
5696 emit_barrier_after (jump);
5697 emit_label_after (bp->near_label, insn);
5700 JUMP_LABEL (jump) = bp->far_label;
5703 rtx pat = PATTERN (jump);
5704 gcc_assert (ANY_RETURN_P (pat));
5705 JUMP_LABEL (jump) = pat;
5708 ok = invert_jump (insn, label, 1);
5711 /* If we are branching around a jump (rather than a return), prevent
5712 reorg from using an insn from the jump target as the delay slot insn -
5713 when reorg did this, it pessimized code (we rather hide the delay slot)
5714 and it could cause branches to go out of range. */
5717 (gen_stuff_delay_slot
5718 (GEN_INT (unspec_bbr_uid++),
5719 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5721 /* Prevent reorg from undoing our splits. */
5722 gen_block_redirect (jump, bp->address += 2, 2);
5725 /* Fix up ADDR_DIFF_VECs. */
5727 fixup_addr_diff_vecs (rtx first)
5731 for (insn = first; insn; insn = NEXT_INSN (insn))
5733 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5735 if (! JUMP_TABLE_DATA_P (insn)
5736 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5738 pat = PATTERN (insn);
5739 vec_lab = XEXP (XEXP (pat, 0), 0);
5741 /* Search the matching casesi_jump_2. */
5742 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5746 prevpat = PATTERN (prev);
5747 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5749 x = XVECEXP (prevpat, 0, 1);
5750 if (GET_CODE (x) != USE)
5753 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5756 /* FIXME: This is a bug in the optimizer, but it seems harmless
5757 to just avoid panicing. */
5761 /* Emit the reference label of the braf where it belongs, right after
5762 the casesi_jump_2 (i.e. braf). */
5763 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5764 emit_label_after (braf_label, prev);
5766 /* Fix up the ADDR_DIF_VEC to be relative
5767 to the reference address of the braf. */
5768 XEXP (XEXP (pat, 0), 0) = braf_label;
5772 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5773 a barrier. Return the base 2 logarithm of the desired alignment. */
5775 barrier_align (rtx barrier_or_label)
5779 if (LABEL_P (barrier_or_label)
5780 && NEXT_INSN (barrier_or_label)
5781 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5784 if (BARRIER_P (barrier_or_label)
5785 && PREV_INSN (barrier_or_label)
5786 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5788 pat = PATTERN (PREV_INSN (barrier_or_label));
5789 /* If this is a very small table, we want to keep the alignment after
5790 the table to the minimum for proper code alignment. */
5791 return ((optimize_size
5792 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5793 <= (unsigned) 1 << (CACHE_LOG - 2)))
5794 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5797 next = next_active_insn (barrier_or_label);
5802 pat = PATTERN (next);
5804 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5805 /* This is a barrier in front of a constant table. */
5811 if (! TARGET_SH2 || ! optimize)
5812 return align_jumps_log;
5814 /* When fixing up pcloads, a constant table might be inserted just before
5815 the basic block that ends with the barrier. Thus, we can't trust the
5816 instruction lengths before that. */
5817 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5819 /* Check if there is an immediately preceding branch to the insn beyond
5820 the barrier. We must weight the cost of discarding useful information
5821 from the current cache line when executing this branch and there is
5822 an alignment, against that of fetching unneeded insn in front of the
5823 branch target when there is no alignment. */
5825 /* There are two delay_slot cases to consider. One is the simple case
5826 where the preceding branch is to the insn beyond the barrier (simple
5827 delay slot filling), and the other is where the preceding branch has
5828 a delay slot that is a duplicate of the insn after the barrier
5829 (fill_eager_delay_slots) and the branch is to the insn after the insn
5830 after the barrier. */
5833 bool jump_to_next = false;
5835 /* Skip to the insn before the JUMP_INSN before the barrier under
5837 rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
5839 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5840 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5841 prev = prev_real_insn (prev))
5843 jump_to_next = false;
5844 if (GET_CODE (PATTERN (prev)) == USE
5845 || GET_CODE (PATTERN (prev)) == CLOBBER)
5847 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5849 prev = XVECEXP (PATTERN (prev), 0, 1);
5850 if (INSN_UID (prev) == INSN_UID (next))
5852 /* Delay slot was filled with insn at jump target. */
5853 jump_to_next = true;
5859 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5861 credit -= get_attr_length (prev);
5863 if (prev && jump_to_label_p (prev))
5867 || next_real_insn (JUMP_LABEL (prev)) == next
5868 /* If relax_delay_slots() decides NEXT was redundant
5869 with some previous instruction, it will have
5870 redirected PREV's jump to the following insn. */
5871 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5872 /* There is no upper bound on redundant instructions
5873 that might have been skipped, but we must not put an
5874 alignment where none had been before. */
5875 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5877 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5878 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5879 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5881 rtx pat = PATTERN (prev);
5882 if (GET_CODE (pat) == PARALLEL)
5883 pat = XVECEXP (pat, 0, 0);
5884 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5890 return align_jumps_log;
5893 /* If we are inside a phony loop, almost any kind of label can turn up as the
5894 first one in the loop. Aligning a braf label causes incorrect switch
5895 destination addresses; we can detect braf labels because they are
5896 followed by a BARRIER.
5897 Applying loop alignment to small constant or switch tables is a waste
5898 of space, so we suppress this too. */
5900 sh_loop_align (rtx label)
5904 if (! optimize || optimize_size)
5908 next = next_nonnote_insn (next);
5909 while (next && LABEL_P (next));
5913 || recog_memoized (next) == CODE_FOR_consttable_2)
5916 return align_loops_log;
5919 /* Do a final pass over the function, just before delayed branch
5924 rtx first, insn, mova = NULL_RTX;
5926 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5927 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5929 first = get_insns ();
5930 max_labelno_before_reorg = max_label_num ();
5932 /* We must split call insns before introducing `mova's. If we're
5933 optimizing, they'll have already been split. Otherwise, make
5934 sure we don't split them too late. */
5936 split_all_insns_noflow ();
5941 /* If relaxing, generate pseudo-ops to associate function calls with
5942 the symbols they call. It does no harm to not generate these
5943 pseudo-ops. However, when we can generate them, it enables the
5944 linker to potentially relax the jsr to a bsr, and eliminate the
5945 register load and, possibly, the constant pool entry. */
5947 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5950 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5951 own purposes. This works because none of the remaining passes
5952 need to look at them.
5954 ??? But it may break in the future. We should use a machine
5955 dependent REG_NOTE, or some other approach entirely. */
5956 for (insn = first; insn; insn = NEXT_INSN (insn))
5962 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5964 remove_note (insn, note);
5968 for (insn = first; insn; insn = NEXT_INSN (insn))
5970 rtx pattern, reg, link, set, scan, dies, label;
5971 int rescan = 0, foundinsn = 0;
5975 pattern = PATTERN (insn);
5977 if (GET_CODE (pattern) == PARALLEL)
5978 pattern = XVECEXP (pattern, 0, 0);
5979 if (GET_CODE (pattern) == SET)
5980 pattern = SET_SRC (pattern);
5982 if (GET_CODE (pattern) != CALL
5983 || !MEM_P (XEXP (pattern, 0)))
5986 reg = XEXP (XEXP (pattern, 0), 0);
5990 reg = sfunc_uses_reg (insn);
5998 /* Try scanning backward to find where the register is set. */
6000 for (scan = PREV_INSN (insn);
6001 scan && !LABEL_P (scan);
6002 scan = PREV_INSN (scan))
6004 if (! INSN_P (scan))
6007 if (! reg_mentioned_p (reg, scan))
6010 if (noncall_uses_reg (reg, scan, &set))
6023 /* The register is set at LINK. */
6025 /* We can only optimize the function call if the register is
6026 being set to a symbol. In theory, we could sometimes
6027 optimize calls to a constant location, but the assembler
6028 and linker do not support that at present. */
6029 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6030 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6033 /* Scan forward from LINK to the place where REG dies, and
6034 make sure that the only insns which use REG are
6035 themselves function calls. */
6037 /* ??? This doesn't work for call targets that were allocated
6038 by reload, since there may not be a REG_DEAD note for the
6042 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6046 /* Don't try to trace forward past a CODE_LABEL if we haven't
6047 seen INSN yet. Ordinarily, we will only find the setting insn
6048 if it is in the same basic block. However,
6049 cross-jumping can insert code labels in between the load and
6050 the call, and can result in situations where a single call
6051 insn may have two targets depending on where we came from. */
6053 if (LABEL_P (scan) && ! foundinsn)
6056 if (! INSN_P (scan))
6059 /* Don't try to trace forward past a JUMP. To optimize
6060 safely, we would have to check that all the
6061 instructions at the jump destination did not use REG. */
6066 if (! reg_mentioned_p (reg, scan))
6069 if (noncall_uses_reg (reg, scan, &scanset))
6076 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6078 /* There is a function call to this register other
6079 than the one we are checking. If we optimize
6080 this call, we need to rescan again below. */
6084 /* ??? We shouldn't have to worry about SCANSET here.
6085 We should just be able to check for a REG_DEAD note
6086 on a function call. However, the REG_DEAD notes are
6087 apparently not dependable around libcalls; c-torture
6088 execute/920501-2 is a test case. If SCANSET is set,
6089 then this insn sets the register, so it must have
6090 died earlier. Unfortunately, this will only handle
6091 the cases in which the register is, in fact, set in a
6094 /* ??? We shouldn't have to use FOUNDINSN here.
6095 This dates back to when we used LOG_LINKS to find
6096 the most recent insn which sets the register. */
6100 || find_reg_note (scan, REG_DEAD, reg)))
6109 /* Either there was a branch, or some insn used REG
6110 other than as a function call address. */
6114 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6115 on the insn which sets the register, and on each call insn
6116 which uses the register. In final_prescan_insn we look for
6117 the REG_LABEL_OPERAND notes, and output the appropriate label
6120 label = gen_label_rtx ();
6121 add_reg_note (link, REG_LABEL_OPERAND, label);
6122 add_reg_note (insn, REG_LABEL_OPERAND, label);
6130 scan = NEXT_INSN (scan);
6133 && reg_mentioned_p (reg, scan))
6134 || ((reg2 = sfunc_uses_reg (scan))
6135 && REGNO (reg2) == REGNO (reg))))
6136 add_reg_note (scan, REG_LABEL_OPERAND, label);
6138 while (scan != dies);
6144 fixup_addr_diff_vecs (first);
6148 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6149 shorten_branches (first);
6152 /* Scan the function looking for move instructions which have to be
6153 changed to pc-relative loads and insert the literal tables. */
6154 label_ref_list_pool = create_alloc_pool ("label references list",
6155 sizeof (struct label_ref_list_d),
6157 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6158 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6162 /* ??? basic block reordering can move a switch table dispatch
6163 below the switch table. Check if that has happened.
6164 We only have the addresses available when optimizing; but then,
6165 this check shouldn't be needed when not optimizing. */
6166 if (!untangle_mova (&num_mova, &mova, insn))
6172 else if (JUMP_TABLE_DATA_P (insn)
6173 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6175 /* ??? loop invariant motion can also move a mova out of a
6176 loop. Since loop does this code motion anyway, maybe we
6177 should wrap UNSPEC_MOVA into a CONST, so that reload can
6180 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6181 || (prev_nonnote_insn (insn)
6182 == XEXP (MOVA_LABELREF (mova), 0))))
6189 /* Some code might have been inserted between the mova and
6190 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6191 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6192 total += get_attr_length (scan);
6194 /* range of mova is 1020, add 4 because pc counts from address of
6195 second instruction after this one, subtract 2 in case pc is 2
6196 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6197 cancels out with alignment effects of the mova itself. */
6200 /* Change the mova into a load, and restart scanning
6201 there. broken_move will then return true for mova. */
6206 if (broken_move (insn)
6207 || (NONJUMP_INSN_P (insn)
6208 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6211 /* Scan ahead looking for a barrier to stick the constant table
6213 rtx barrier = find_barrier (num_mova, mova, insn);
6214 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6215 int need_aligned_label = 0;
6217 if (num_mova && ! mova_p (mova))
6219 /* find_barrier had to change the first mova into a
6220 pcload; thus, we have to start with this new pcload. */
6224 /* Now find all the moves between the points and modify them. */
6225 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6229 if (NONJUMP_INSN_P (scan)
6230 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6231 need_aligned_label = 1;
6232 if (broken_move (scan))
6234 rtx *patp = &PATTERN (scan), pat = *patp;
6238 enum machine_mode mode;
6240 if (GET_CODE (pat) == PARALLEL)
6241 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6242 src = SET_SRC (pat);
6243 dst = SET_DEST (pat);
6244 mode = GET_MODE (dst);
6246 if (mode == SImode && satisfies_constraint_I16 (src)
6247 && REGNO (dst) != FPUL_REG)
6252 while (GET_CODE (dst) == SUBREG)
6254 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6255 GET_MODE (SUBREG_REG (dst)),
6258 dst = SUBREG_REG (dst);
6260 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6262 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6264 /* This must be an insn that clobbers r0. */
6265 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6266 XVECLEN (PATTERN (scan), 0)
6268 rtx clobber = *clobberp;
6270 gcc_assert (GET_CODE (clobber) == CLOBBER
6271 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6274 && reg_set_between_p (r0_rtx, last_float_move, scan))
6278 && GET_MODE_SIZE (mode) != 4
6279 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6281 lab = add_constant (src, mode, last_float);
6283 emit_insn_before (gen_mova (lab), scan);
6286 /* There will be a REG_UNUSED note for r0 on
6287 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6288 lest reorg:mark_target_live_regs will not
6289 consider r0 to be used, and we end up with delay
6290 slot insn in front of SCAN that clobbers r0. */
6292 = find_regno_note (last_float_move, REG_UNUSED, 0);
6294 /* If we are not optimizing, then there may not be
6297 PUT_REG_NOTE_KIND (note, REG_INC);
6299 *last_float_addr = r0_inc_rtx;
6301 last_float_move = scan;
6303 newsrc = gen_const_mem (mode,
6304 (((TARGET_SH4 && ! TARGET_FMOVD)
6305 || REGNO (dst) == FPUL_REG)
6308 last_float_addr = &XEXP (newsrc, 0);
6310 /* Remove the clobber of r0. */
6311 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6312 gen_rtx_SCRATCH (Pmode));
6314 /* This is a mova needing a label. Create it. */
6315 else if (GET_CODE (src) == UNSPEC
6316 && XINT (src, 1) == UNSPEC_MOVA
6317 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6319 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6320 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6321 newsrc = gen_rtx_UNSPEC (SImode,
6322 gen_rtvec (1, newsrc),
6325 else if (GET_CODE (src) == UNSPEC_VOLATILE
6326 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6328 newsrc = XVECEXP (src, 0, 0);
6329 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6330 INSN_CODE (scan) = -1;
6335 lab = add_constant (src, mode, 0);
6336 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6337 newsrc = gen_const_mem (mode, newsrc);
6339 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6340 INSN_CODE (scan) = -1;
6343 dump_table (need_aligned_label ? insn : 0, barrier);
6347 free_alloc_pool (label_ref_list_pool);
6348 for (insn = first; insn; insn = NEXT_INSN (insn))
6349 PUT_MODE (insn, VOIDmode);
6351 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6352 INSN_ADDRESSES_FREE ();
6353 split_branches (first);
6355 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6356 also has an effect on the register that holds the address of the sfunc.
6357 Insert an extra dummy insn in front of each sfunc that pretends to
6358 use this register. */
6359 if (flag_delayed_branch)
6361 for (insn = first; insn; insn = NEXT_INSN (insn))
6363 rtx reg = sfunc_uses_reg (insn);
6367 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6371 /* fpscr is not actually a user variable, but we pretend it is for the
6372 sake of the previous optimization passes, since we want it handled like
6373 one. However, we don't have any debugging information for it, so turn
6374 it into a non-user variable now. */
6376 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6378 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6381 /* Return the UID of the insn that follows the specified label. */
6383 get_dest_uid (rtx label, int max_uid)
6385 rtx dest = next_real_insn (label);
6388 /* This can happen for an undefined label. */
6390 dest_uid = INSN_UID (dest);
6391 /* If this is a newly created branch redirection blocking instruction,
6392 we cannot index the branch_uid or insn_addresses arrays with its
6393 uid. But then, we won't need to, because the actual destination is
6394 the following branch. */
6395 while (dest_uid >= max_uid)
6397 dest = NEXT_INSN (dest);
6398 dest_uid = INSN_UID (dest);
6400 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6405 /* Split condbranches that are out of range. Also add clobbers for
6406 scratch registers that are needed in far jumps.
6407 We do this before delay slot scheduling, so that it can take our
6408 newly created instructions into account. It also allows us to
6409 find branches with common targets more easily. */
6411 split_branches (rtx first)
6414 struct far_branch **uid_branch, *far_branch_list = 0;
6415 int max_uid = get_max_uid ();
6418 /* Find out which branches are out of range. */
6419 shorten_branches (first);
6421 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6422 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6424 for (insn = first; insn; insn = NEXT_INSN (insn))
6425 if (! INSN_P (insn))
6427 else if (INSN_DELETED_P (insn))
6429 /* Shorten_branches would split this instruction again,
6430 so transform it into a note. */
6431 SET_INSN_DELETED (insn);
6433 else if (JUMP_P (insn))
6435 enum attr_type type = get_attr_type (insn);
6436 if (type == TYPE_CBRANCH)
6440 if (get_attr_length (insn) > 4)
6442 rtx src = SET_SRC (PATTERN (insn));
6443 rtx olabel = XEXP (XEXP (src, 1), 0);
6444 int addr = INSN_ADDRESSES (INSN_UID (insn));
6446 int dest_uid = get_dest_uid (olabel, max_uid);
6447 struct far_branch *bp = uid_branch[dest_uid];
6449 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6450 the label if the LABEL_NUSES count drops to zero. There is
6451 always a jump_optimize pass that sets these values, but it
6452 proceeds to delete unreferenced code, and then if not
6453 optimizing, to un-delete the deleted instructions, thus
6454 leaving labels with too low uses counts. */
6457 JUMP_LABEL (insn) = olabel;
6458 LABEL_NUSES (olabel)++;
6462 bp = (struct far_branch *) alloca (sizeof *bp);
6463 uid_branch[dest_uid] = bp;
6464 bp->prev = far_branch_list;
6465 far_branch_list = bp;
6467 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6468 LABEL_NUSES (bp->far_label)++;
6472 label = bp->near_label;
6473 if (! label && bp->address - addr >= CONDJUMP_MIN)
6475 rtx block = bp->insert_place;
6477 if (GET_CODE (PATTERN (block)) == RETURN)
6478 block = PREV_INSN (block);
6480 block = gen_block_redirect (block,
6482 label = emit_label_after (gen_label_rtx (),
6484 bp->near_label = label;
6486 else if (label && ! NEXT_INSN (label))
6488 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6489 bp->insert_place = insn;
6491 gen_far_branch (bp);
6495 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6497 bp->near_label = label = gen_label_rtx ();
6498 bp->insert_place = insn;
6501 ok = redirect_jump (insn, label, 0);
6506 /* get_attr_length (insn) == 2 */
6507 /* Check if we have a pattern where reorg wants to redirect
6508 the branch to a label from an unconditional branch that
6510 /* We can't use JUMP_LABEL here because it might be undefined
6511 when not optimizing. */
6512 /* A syntax error might cause beyond to be NULL_RTX. */
6514 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6519 || ((beyond = next_active_insn (beyond))
6520 && JUMP_P (beyond)))
6521 && GET_CODE (PATTERN (beyond)) == SET
6522 && recog_memoized (beyond) == CODE_FOR_jump_compact
6524 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6525 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6527 gen_block_redirect (beyond,
6528 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6531 next = next_active_insn (insn);
6535 || ((next = next_active_insn (next))
6537 && GET_CODE (PATTERN (next)) == SET
6538 && recog_memoized (next) == CODE_FOR_jump_compact
6540 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6541 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6543 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6545 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6547 int addr = INSN_ADDRESSES (INSN_UID (insn));
6550 struct far_branch *bp;
6552 if (type == TYPE_JUMP)
6554 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6555 dest_uid = get_dest_uid (far_label, max_uid);
6558 /* Parse errors can lead to labels outside
6560 if (! NEXT_INSN (far_label))
6565 JUMP_LABEL (insn) = far_label;
6566 LABEL_NUSES (far_label)++;
6568 redirect_jump (insn, ret_rtx, 1);
6572 bp = uid_branch[dest_uid];
6575 bp = (struct far_branch *) alloca (sizeof *bp);
6576 uid_branch[dest_uid] = bp;
6577 bp->prev = far_branch_list;
6578 far_branch_list = bp;
6580 bp->far_label = far_label;
6582 LABEL_NUSES (far_label)++;
6584 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6585 if (addr - bp->address <= CONDJUMP_MAX)
6586 emit_label_after (bp->near_label, PREV_INSN (insn));
6589 gen_far_branch (bp);
6595 bp->insert_place = insn;
6597 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6599 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6602 /* Generate all pending far branches,
6603 and free our references to the far labels. */
6604 while (far_branch_list)
6606 if (far_branch_list->near_label
6607 && ! NEXT_INSN (far_branch_list->near_label))
6608 gen_far_branch (far_branch_list);
6610 && far_branch_list->far_label
6611 && ! --LABEL_NUSES (far_branch_list->far_label))
6612 delete_insn (far_branch_list->far_label);
6613 far_branch_list = far_branch_list->prev;
6616 /* Instruction length information is no longer valid due to the new
6617 instructions that have been generated. */
6618 init_insn_lengths ();
6621 /* Dump out instruction addresses, which is useful for debugging the
6622 constant pool table stuff.
6624 If relaxing, output the label and pseudo-ops used to link together
6625 calls and the instruction which set the registers.
6627 ??? The addresses printed by this routine for insns are nonsense for
6628 insns which are inside of a sequence where none of the inner insns have
6629 variable length. This is because the second pass of shorten_branches
6630 does not bother to update them. */
6632 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6633 int noperands ATTRIBUTE_UNUSED)
6635 if (TARGET_DUMPISIZE)
6636 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6642 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6647 pattern = PATTERN (insn);
6648 if (GET_CODE (pattern) == PARALLEL)
6649 pattern = XVECEXP (pattern, 0, 0);
6650 switch (GET_CODE (pattern))
6653 if (GET_CODE (SET_SRC (pattern)) != CALL
6654 && get_attr_type (insn) != TYPE_SFUNC)
6656 targetm.asm_out.internal_label
6657 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6660 /* else FALLTHROUGH */
6662 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6663 CODE_LABEL_NUMBER (XEXP (note, 0)));
6673 /* Dump out any constants accumulated in the final pass. These will
6676 output_jump_label_table (void)
6682 fprintf (asm_out_file, "\t.align 2\n");
6683 for (i = 0; i < pool_size; i++)
6685 pool_node *p = &pool_vector[i];
6687 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6688 CODE_LABEL_NUMBER (p->label));
6689 output_asm_insn (".long %O0", &p->value);
6697 /* A full frame looks like:
6701 [ if current_function_anonymous_args
6714 local-0 <- fp points here.
6716 Number of bytes pushed for anonymous args, used to pass information
6717 between expand_prologue and expand_epilogue.
6719 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6720 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6721 for an epilogue and a negative value means that it's for a sibcall
6722 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6723 all the registers that are about to be restored, and hence dead. */
6725 output_stack_adjust (int size, rtx reg, int epilogue_p,
6726 HARD_REG_SET *live_regs_mask, bool frame_p)
6728 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6731 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6733 /* This test is bogus, as output_stack_adjust is used to re-align the
6736 gcc_assert (!(size % align));
6739 if (CONST_OK_FOR_ADD (size))
6740 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6741 /* Try to do it with two partial adjustments; however, we must make
6742 sure that the stack is properly aligned at all times, in case
6743 an interrupt occurs between the two partial adjustments. */
6744 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6745 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6747 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6748 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6754 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6757 /* If TEMP is invalid, we could temporarily save a general
6758 register to MACL. However, there is currently no need
6759 to handle this case, so just die when we see it. */
6761 || current_function_interrupt
6762 || ! call_really_used_regs[temp] || fixed_regs[temp])
6764 if (temp < 0 && ! current_function_interrupt
6765 && (TARGET_SHMEDIA || epilogue_p >= 0))
6768 COPY_HARD_REG_SET (temps, call_used_reg_set);
6769 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6773 if (crtl->return_rtx)
6775 enum machine_mode mode;
6776 mode = GET_MODE (crtl->return_rtx);
6777 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6778 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6780 for (i = 0; i < nreg; i++)
6781 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6782 if (crtl->calls_eh_return)
6784 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6785 for (i = 0; i <= 3; i++)
6786 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6789 if (TARGET_SHMEDIA && epilogue_p < 0)
6790 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6791 CLEAR_HARD_REG_BIT (temps, i);
6792 if (epilogue_p <= 0)
6794 for (i = FIRST_PARM_REG;
6795 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6796 CLEAR_HARD_REG_BIT (temps, i);
6797 if (cfun->static_chain_decl != NULL)
6798 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6800 temp = scavenge_reg (&temps);
6802 if (temp < 0 && live_regs_mask)
6806 COPY_HARD_REG_SET (temps, *live_regs_mask);
6807 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6808 temp = scavenge_reg (&temps);
6812 rtx adj_reg, tmp_reg, mem;
6814 /* If we reached here, the most likely case is the (sibcall)
6815 epilogue for non SHmedia. Put a special push/pop sequence
6816 for such case as the last resort. This looks lengthy but
6817 would not be problem because it seems to be very
6820 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6823 /* ??? There is still the slight possibility that r4 or
6824 r5 have been reserved as fixed registers or assigned
6825 as global registers, and they change during an
6826 interrupt. There are possible ways to handle this:
6828 - If we are adjusting the frame pointer (r14), we can do
6829 with a single temp register and an ordinary push / pop
6831 - Grab any call-used or call-saved registers (i.e. not
6832 fixed or globals) for the temps we need. We might
6833 also grab r14 if we are adjusting the stack pointer.
6834 If we can't find enough available registers, issue
6835 a diagnostic and die - the user must have reserved
6836 way too many registers.
6837 But since all this is rather unlikely to happen and
6838 would require extra testing, we just die if r4 / r5
6839 are not available. */
6840 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6841 && !global_regs[4] && !global_regs[5]);
6843 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6844 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6845 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6846 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6847 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6848 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6849 emit_move_insn (mem, tmp_reg);
6850 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6851 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6852 emit_move_insn (mem, tmp_reg);
6853 emit_move_insn (reg, adj_reg);
6854 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6855 emit_move_insn (adj_reg, mem);
6856 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6857 emit_move_insn (tmp_reg, mem);
6858 /* Tell flow the insns that pop r4/r5 aren't dead. */
6863 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6865 /* If SIZE is negative, subtract the positive value.
6866 This sometimes allows a constant pool entry to be shared
6867 between prologue and epilogue code. */
6870 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6871 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6875 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6876 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6878 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6879 gen_rtx_SET (VOIDmode, reg,
6880 gen_rtx_PLUS (SImode, reg,
6886 /* Emit the specified insn and mark it as frame related.
6887 FIXME: Rename this to emit_frame_insn. */
6892 RTX_FRAME_RELATED_P (x) = 1;
6896 /* Output RTL to push register RN onto the stack. */
6902 x = gen_push_fpul ();
6903 else if (rn == FPSCR_REG)
6904 x = gen_push_fpscr ();
6905 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6906 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6908 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6910 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6912 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6913 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6915 x = gen_push (gen_rtx_REG (SImode, rn));
6918 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6922 /* Output RTL to pop register RN from the stack. */
6928 x = gen_pop_fpul ();
6929 else if (rn == FPSCR_REG)
6930 x = gen_pop_fpscr ();
6931 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6932 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6934 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6936 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6938 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6939 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6941 x = gen_pop (gen_rtx_REG (SImode, rn));
6945 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6946 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6947 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6948 : SET_DEST (PATTERN (x)));
6949 add_reg_note (x, REG_CFA_RESTORE, reg);
6950 add_reg_note (x, REG_CFA_ADJUST_CFA,
6951 gen_rtx_SET (SImode, sp_reg,
6952 plus_constant (SImode, sp_reg,
6953 GET_MODE_SIZE (GET_MODE (reg)))));
6954 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6955 RTX_FRAME_RELATED_P (x) = 1;
6958 /* Generate code to push the regs specified in the mask. */
6960 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6962 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6965 /* Push PR last; this gives better latencies after the prologue, and
6966 candidates for the return delay slot when there are no general
6967 registers pushed. */
6968 for (; i < FIRST_PSEUDO_REGISTER; i++)
6970 /* If this is an interrupt handler, and the SZ bit varies,
6971 and we have to push any floating point register, we need
6972 to switch to the correct precision first. */
6973 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6974 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6976 HARD_REG_SET unsaved;
6979 COMPL_HARD_REG_SET (unsaved, *mask);
6980 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6984 && (i != FPSCR_REG || ! skip_fpscr)
6985 && TEST_HARD_REG_BIT (*mask, i))
6987 /* If the ISR has RESBANK attribute assigned, don't push any of
6988 the following registers - R0-R14, MACH, MACL and GBR. */
6989 if (! (sh_cfun_resbank_handler_p ()
6990 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6998 /* Push banked registers last to improve delay slot opportunities. */
6999 if (interrupt_handler)
7001 bool use_movml = false;
7005 unsigned int count = 0;
7007 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7008 if (TEST_HARD_REG_BIT (*mask, i))
7013 /* Use movml when all banked registers are pushed. */
7014 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7018 if (sh_cfun_resbank_handler_p ())
7022 rtx x, mem, reg, set;
7023 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7025 /* We must avoid scheduling multiple store insn with another
7027 emit_insn (gen_blockage ());
7028 x = gen_movml_push_banked (sp_reg);
7030 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7032 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7033 reg = gen_rtx_REG (SImode, i);
7034 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7037 set = gen_rtx_SET (SImode, sp_reg,
7038 plus_constant (Pmode, sp_reg, - 32));
7039 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7040 emit_insn (gen_blockage ());
7043 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7044 if (TEST_HARD_REG_BIT (*mask, i))
7048 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7049 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7053 /* Calculate how much extra space is needed to save all callee-saved
7055 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7057 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7060 int stack_space = 0;
7061 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7063 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7064 if ((! call_really_used_regs[reg] || interrupt_handler)
7065 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7066 /* Leave space to save this target register on the stack,
7067 in case target register allocation wants to use it. */
7068 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7072 /* Decide whether we should reserve space for callee-save target registers,
7073 in case target register allocation wants to use them. REGS_SAVED is
7074 the space, in bytes, that is already required for register saves.
7075 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7077 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7078 HARD_REG_SET *live_regs_mask)
7082 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7085 /* Decide how much space to reserve for callee-save target registers
7086 in case target register allocation wants to use them.
7087 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7089 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7091 if (shmedia_space_reserved_for_target_registers)
7092 return shmedia_target_regs_stack_space (live_regs_mask);
7097 /* Work out the registers which need to be saved, both as a mask and a
7098 count of saved words. Return the count.
7100 If doing a pragma interrupt function, then push all regs used by the
7101 function, and if we call another function (we can tell by looking at PR),
7102 make sure that all the regs it clobbers are safe too. */
7104 calc_live_regs (HARD_REG_SET *live_regs_mask)
7109 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7110 bool nosave_low_regs;
7111 int pr_live, has_call;
7113 attrs = DECL_ATTRIBUTES (current_function_decl);
7114 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7115 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7116 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7117 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7119 CLEAR_HARD_REG_SET (*live_regs_mask);
7120 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7121 && df_regs_ever_live_p (FPSCR_REG))
7122 target_flags &= ~MASK_FPU_SINGLE;
7123 /* If we can save a lot of saves by switching to double mode, do that. */
7124 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7125 && TARGET_FPU_SINGLE)
7126 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7127 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7128 && (! call_really_used_regs[reg]
7129 || interrupt_handler)
7132 target_flags &= ~MASK_FPU_SINGLE;
7135 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7136 knows how to use it. That means the pseudo originally allocated for
7137 the initial value can become the PR_MEDIA_REG hard register, as seen for
7138 execute/20010122-1.c:test9. */
7140 /* ??? this function is called from initial_elimination_offset, hence we
7141 can't use the result of sh_media_register_for_return here. */
7142 pr_live = sh_pr_n_sets ();
7145 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7146 pr_live = (pr_initial
7147 ? (!REG_P (pr_initial)
7148 || REGNO (pr_initial) != (PR_REG))
7149 : df_regs_ever_live_p (PR_REG));
7150 /* For Shcompact, if not optimizing, we end up with a memory reference
7151 using the return address pointer for __builtin_return_address even
7152 though there is no actual need to put the PR register on the stack. */
7153 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7155 /* Force PR to be live if the prologue has to call the SHmedia
7156 argument decoder or register saver. */
7157 if (TARGET_SHCOMPACT
7158 && ((crtl->args.info.call_cookie
7159 & ~ CALL_COOKIE_RET_TRAMP (1))
7160 || crtl->saves_all_registers))
7162 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7163 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7165 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7168 ? (/* Need to save all the regs ever live. */
7169 (df_regs_ever_live_p (reg)
7170 || (call_really_used_regs[reg]
7171 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7172 || reg == PIC_OFFSET_TABLE_REGNUM)
7174 || (TARGET_SHMEDIA && has_call
7175 && REGISTER_NATURAL_MODE (reg) == SImode
7176 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7177 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7178 && reg != RETURN_ADDRESS_POINTER_REGNUM
7179 && reg != T_REG && reg != GBR_REG
7180 /* Push fpscr only on targets which have FPU */
7181 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7182 : (/* Only push those regs which are used and need to be saved. */
7185 && crtl->args.info.call_cookie
7186 && reg == PIC_OFFSET_TABLE_REGNUM)
7187 || (df_regs_ever_live_p (reg)
7188 && ((!call_really_used_regs[reg]
7189 && !(reg != PIC_OFFSET_TABLE_REGNUM
7190 && fixed_regs[reg] && call_used_regs[reg]))
7191 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7192 || (crtl->calls_eh_return
7193 && (reg == EH_RETURN_DATA_REGNO (0)
7194 || reg == EH_RETURN_DATA_REGNO (1)
7195 || reg == EH_RETURN_DATA_REGNO (2)
7196 || reg == EH_RETURN_DATA_REGNO (3)))
7197 || ((reg == MACL_REG || reg == MACH_REG)
7198 && df_regs_ever_live_p (reg)
7199 && sh_cfun_attr_renesas_p ())
7202 SET_HARD_REG_BIT (*live_regs_mask, reg);
7203 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7205 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7206 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7208 if (FP_REGISTER_P (reg))
7210 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7212 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7213 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7216 else if (XD_REGISTER_P (reg))
7218 /* Must switch to double mode to access these registers. */
7219 target_flags &= ~MASK_FPU_SINGLE;
7223 if (nosave_low_regs && reg == R8_REG)
7226 /* If we have a target register optimization pass after prologue / epilogue
7227 threading, we need to assume all target registers will be live even if
7229 if (flag_branch_target_load_optimize2
7230 && TARGET_SAVE_ALL_TARGET_REGS
7231 && shmedia_space_reserved_for_target_registers)
7232 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7233 if ((! call_really_used_regs[reg] || interrupt_handler)
7234 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7236 SET_HARD_REG_BIT (*live_regs_mask, reg);
7237 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7239 /* If this is an interrupt handler, we don't have any call-clobbered
7240 registers we can conveniently use for target register save/restore.
7241 Make sure we save at least one general purpose register when we need
7242 to save target registers. */
7243 if (interrupt_handler
7244 && hard_reg_set_intersect_p (*live_regs_mask,
7245 reg_class_contents[TARGET_REGS])
7246 && ! hard_reg_set_intersect_p (*live_regs_mask,
7247 reg_class_contents[GENERAL_REGS]))
7249 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7250 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7256 /* Code to generate prologue and epilogue sequences */
7258 /* PUSHED is the number of bytes that are being pushed on the
7259 stack for register saves. Return the frame size, padded
7260 appropriately so that the stack stays properly aligned. */
7261 static HOST_WIDE_INT
7262 rounded_frame_size (int pushed)
7264 HOST_WIDE_INT size = get_frame_size ();
7265 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7267 if (ACCUMULATE_OUTGOING_ARGS)
7268 size += crtl->outgoing_args_size;
7270 return ((size + pushed + align - 1) & -align) - pushed;
7273 /* Choose a call-clobbered target-branch register that remains
7274 unchanged along the whole function. We set it up as the return
7275 value in the prologue. */
7277 sh_media_register_for_return (void)
7282 if (! crtl->is_leaf)
7284 if (lookup_attribute ("interrupt_handler",
7285 DECL_ATTRIBUTES (current_function_decl)))
7287 if (sh_cfun_interrupt_handler_p ())
7290 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7292 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7293 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7299 /* The maximum registers we need to save are:
7300 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7301 - 32 floating point registers (for each pair, we save none,
7302 one single precision value, or a double precision value).
7303 - 8 target registers
7304 - add 1 entry for a delimiter. */
7305 #define MAX_SAVED_REGS (62+32+8)
7307 typedef struct save_entry_s
7316 /* There will be a delimiter entry with VOIDmode both at the start and the
7317 end of a filled in schedule. The end delimiter has the offset of the
7318 save with the smallest (i.e. most negative) offset. */
7319 typedef struct save_schedule_s
7321 save_entry entries[MAX_SAVED_REGS + 2];
7322 int temps[MAX_TEMPS+1];
7325 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7326 use reverse order. Returns the last entry written to (not counting
7327 the delimiter). OFFSET_BASE is a number to be added to all offset
7330 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7334 save_entry *entry = schedule->entries;
7338 if (! current_function_interrupt)
7339 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7340 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7341 && ! FUNCTION_ARG_REGNO_P (i)
7342 && i != FIRST_RET_REG
7343 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7344 && ! (crtl->calls_eh_return
7345 && (i == EH_RETURN_STACKADJ_REGNO
7346 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7347 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7348 schedule->temps[tmpx++] = i;
7350 entry->mode = VOIDmode;
7351 entry->offset = offset_base;
7353 /* We loop twice: first, we save 8-byte aligned registers in the
7354 higher addresses, that are known to be aligned. Then, we
7355 proceed to saving 32-bit registers that don't need 8-byte
7357 If this is an interrupt function, all registers that need saving
7358 need to be saved in full. moreover, we need to postpone saving
7359 target registers till we have saved some general purpose registers
7360 we can then use as scratch registers. */
7361 offset = offset_base;
7362 for (align = 1; align >= 0; align--)
7364 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7365 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7367 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7370 if (current_function_interrupt)
7372 if (TARGET_REGISTER_P (i))
7374 if (GENERAL_REGISTER_P (i))
7377 if (mode == SFmode && (i % 2) == 1
7378 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7379 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7386 /* If we're doing the aligned pass and this is not aligned,
7387 or we're doing the unaligned pass and this is aligned,
7389 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7393 if (current_function_interrupt
7394 && GENERAL_REGISTER_P (i)
7395 && tmpx < MAX_TEMPS)
7396 schedule->temps[tmpx++] = i;
7398 offset -= GET_MODE_SIZE (mode);
7401 entry->offset = offset;
7404 if (align && current_function_interrupt)
7405 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7406 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7408 offset -= GET_MODE_SIZE (DImode);
7410 entry->mode = DImode;
7411 entry->offset = offset;
7416 entry->mode = VOIDmode;
7417 entry->offset = offset;
7418 schedule->temps[tmpx] = -1;
7422 /* Expand code for the function prologue. */
7424 sh_expand_prologue (void)
7426 HARD_REG_SET live_regs_mask;
7429 int save_flags = target_flags;
7433 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7435 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7437 /* We have pretend args if we had an object sent partially in registers
7438 and partially on the stack, e.g. a large structure. */
7439 pretend_args = crtl->args.pretend_args_size;
7440 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7441 && (NPARM_REGS(SImode)
7442 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7445 output_stack_adjust (-pretend_args
7446 - crtl->args.info.stack_regs * 8,
7447 stack_pointer_rtx, 0, NULL, true);
7448 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7450 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7451 /* We're going to use the PIC register to load the address of the
7452 incoming-argument decoder and/or of the return trampoline from
7453 the GOT, so make sure the PIC register is preserved and
7455 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7457 if (TARGET_SHCOMPACT
7458 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7462 /* First, make all registers with incoming arguments that will
7463 be pushed onto the stack live, so that register renaming
7464 doesn't overwrite them. */
7465 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7466 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7467 >= NPARM_REGS (SImode) - reg)
7468 for (; reg < NPARM_REGS (SImode); reg++)
7469 emit_insn (gen_shcompact_preserve_incoming_args
7470 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7471 else if (CALL_COOKIE_INT_REG_GET
7472 (crtl->args.info.call_cookie, reg) == 1)
7473 emit_insn (gen_shcompact_preserve_incoming_args
7474 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7476 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7478 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7479 GEN_INT (crtl->args.info.call_cookie));
7480 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7481 gen_rtx_REG (SImode, R0_REG));
7483 else if (TARGET_SHMEDIA)
7485 int tr = sh_media_register_for_return ();
7488 emit_move_insn (gen_rtx_REG (DImode, tr),
7489 gen_rtx_REG (DImode, PR_MEDIA_REG));
7492 /* Emit the code for SETUP_VARARGS. */
7495 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7497 /* Push arg regs as if they'd been provided by caller in stack. */
7498 for (i = 0; i < NPARM_REGS(SImode); i++)
7500 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7502 if (i >= (NPARM_REGS(SImode)
7503 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7507 stack_usage += GET_MODE_SIZE (SImode);
7512 /* If we're supposed to switch stacks at function entry, do so now. */
7516 /* The argument specifies a variable holding the address of the
7517 stack the interrupt function should switch to/from at entry/exit. */
7518 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7520 = ggc_strdup (TREE_STRING_POINTER (arg));
7521 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7523 lab = add_constant (sp_switch, SImode, 0);
7524 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7526 emit_insn (gen_sp_switch_1 (newsrc));
7529 d = calc_live_regs (&live_regs_mask);
7530 /* ??? Maybe we could save some switching if we can move a mode switch
7531 that already happens to be at the function start into the prologue. */
7532 if (target_flags != save_flags && ! current_function_interrupt)
7533 emit_insn (gen_toggle_sz ());
7537 int offset_base, offset;
7539 int offset_in_r0 = -1;
7541 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7542 int total_size, save_size;
7543 save_schedule schedule;
7547 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7548 && ! current_function_interrupt)
7549 r0 = gen_rtx_REG (Pmode, R0_REG);
7551 /* D is the actual number of bytes that we need for saving registers,
7552 however, in initial_elimination_offset we have committed to using
7553 an additional TREGS_SPACE amount of bytes - in order to keep both
7554 addresses to arguments supplied by the caller and local variables
7555 valid, we must keep this gap. Place it between the incoming
7556 arguments and the actually saved registers in a bid to optimize
7557 locality of reference. */
7558 total_size = d + tregs_space;
7559 total_size += rounded_frame_size (total_size);
7560 save_size = total_size - rounded_frame_size (d);
7561 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7562 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7563 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7565 /* If adjusting the stack in a single step costs nothing extra, do so.
7566 I.e. either if a single addi is enough, or we need a movi anyway,
7567 and we don't exceed the maximum offset range (the test for the
7568 latter is conservative for simplicity). */
7570 && (CONST_OK_FOR_I10 (-total_size)
7571 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7572 && total_size <= 2044)))
7573 d_rounding = total_size - save_size;
7575 offset_base = d + d_rounding;
7577 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7579 stack_usage += save_size + d_rounding;
7581 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7582 tmp_pnt = schedule.temps;
7583 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7585 enum machine_mode mode = (enum machine_mode) entry->mode;
7586 unsigned int reg = entry->reg;
7587 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7590 offset = entry->offset;
7592 reg_rtx = gen_rtx_REG (mode, reg);
7594 mem_rtx = gen_frame_mem (mode,
7595 gen_rtx_PLUS (Pmode,
7599 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7605 if (HAVE_PRE_DECREMENT
7606 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7607 || mem_rtx == NULL_RTX
7608 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7610 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7612 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7617 offset += GET_MODE_SIZE (mode);
7621 if (mem_rtx != NULL_RTX)
7624 if (offset_in_r0 == -1)
7626 emit_move_insn (r0, GEN_INT (offset));
7627 offset_in_r0 = offset;
7629 else if (offset != offset_in_r0)
7634 GEN_INT (offset - offset_in_r0)));
7635 offset_in_r0 += offset - offset_in_r0;
7638 if (pre_dec != NULL_RTX)
7644 (Pmode, r0, stack_pointer_rtx));
7648 offset -= GET_MODE_SIZE (mode);
7649 offset_in_r0 -= GET_MODE_SIZE (mode);
7654 mem_rtx = gen_frame_mem (mode, r0);
7656 mem_rtx = gen_frame_mem (mode,
7657 gen_rtx_PLUS (Pmode,
7661 /* We must not use an r0-based address for target-branch
7662 registers or for special registers without pre-dec
7663 memory addresses, since we store their values in r0
7665 gcc_assert (!TARGET_REGISTER_P (reg)
7666 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7667 || mem_rtx == pre_dec));
7670 orig_reg_rtx = reg_rtx;
7671 if (TARGET_REGISTER_P (reg)
7672 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7673 && mem_rtx != pre_dec))
7675 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7677 emit_move_insn (tmp_reg, reg_rtx);
7679 if (REGNO (tmp_reg) == R0_REG)
7683 gcc_assert (!refers_to_regno_p
7684 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7687 if (*++tmp_pnt <= 0)
7688 tmp_pnt = schedule.temps;
7695 /* Mark as interesting for dwarf cfi generator */
7696 insn = emit_move_insn (mem_rtx, reg_rtx);
7697 RTX_FRAME_RELATED_P (insn) = 1;
7698 /* If we use an intermediate register for the save, we can't
7699 describe this exactly in cfi as a copy of the to-be-saved
7700 register into the temporary register and then the temporary
7701 register on the stack, because the temporary register can
7702 have a different natural size than the to-be-saved register.
7703 Thus, we gloss over the intermediate copy and pretend we do
7704 a direct save from the to-be-saved register. */
7705 if (REGNO (reg_rtx) != reg)
7709 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7710 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7713 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7715 rtx reg_rtx = gen_rtx_REG (mode, reg);
7717 rtx mem_rtx = gen_frame_mem (mode,
7718 gen_rtx_PLUS (Pmode,
7722 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7723 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7728 gcc_assert (entry->offset == d_rounding);
7732 push_regs (&live_regs_mask, current_function_interrupt);
7736 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7737 emit_insn (gen_GOTaddr2picreg ());
7739 if (SHMEDIA_REGS_STACK_ADJUST ())
7741 /* This must NOT go through the PLT, otherwise mach and macl
7742 may be clobbered. */
7743 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7745 ? "__GCC_push_shmedia_regs"
7746 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7747 emit_insn (gen_shmedia_save_restore_regs_compact
7748 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7751 if (target_flags != save_flags && ! current_function_interrupt)
7752 emit_insn (gen_toggle_sz ());
7754 target_flags = save_flags;
7756 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7757 stack_pointer_rtx, 0, NULL, true);
7758 stack_usage += rounded_frame_size (d) - d_rounding;
7760 if (frame_pointer_needed)
7761 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7763 if (TARGET_SHCOMPACT
7764 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7766 /* This must NOT go through the PLT, otherwise mach and macl
7767 may be clobbered. */
7768 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7769 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7770 emit_insn (gen_shcompact_incoming_args ());
7773 /* If we are profiling, make sure no instructions are scheduled before
7774 the call to mcount. Similarly if some call instructions are swapped
7775 before frame related insns, it'll confuse the unwinder because
7776 currently SH has no unwind info for function epilogues. */
7777 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7778 emit_insn (gen_blockage ());
7780 if (flag_stack_usage_info)
7781 current_function_static_stack_size = stack_usage;
7784 /* Expand code for the function epilogue. */
7786 sh_expand_epilogue (bool sibcall_p)
7788 HARD_REG_SET live_regs_mask;
7792 int save_flags = target_flags;
7793 int frame_size, save_size;
7794 int fpscr_deferred = 0;
7795 int e = sibcall_p ? -1 : 1;
7797 d = calc_live_regs (&live_regs_mask);
7800 frame_size = rounded_frame_size (d);
7804 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7806 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7807 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7808 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7810 total_size = d + tregs_space;
7811 total_size += rounded_frame_size (total_size);
7812 save_size = total_size - frame_size;
7814 /* If adjusting the stack in a single step costs nothing extra, do so.
7815 I.e. either if a single addi is enough, or we need a movi anyway,
7816 and we don't exceed the maximum offset range (the test for the
7817 latter is conservative for simplicity). */
7819 && ! frame_pointer_needed
7820 && (CONST_OK_FOR_I10 (total_size)
7821 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7822 && total_size <= 2044)))
7823 d_rounding = frame_size;
7825 frame_size -= d_rounding;
7828 if (frame_pointer_needed)
7830 /* We must avoid scheduling the epilogue with previous basic blocks.
7831 See PR/18032 and PR/40313. */
7832 emit_insn (gen_blockage ());
7833 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7834 &live_regs_mask, true);
7836 /* We must avoid moving the stack pointer adjustment past code
7837 which reads from the local frame, else an interrupt could
7838 occur after the SP adjustment and clobber data in the local
7840 emit_insn (gen_blockage ());
7841 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7843 else if (frame_size)
7845 /* We must avoid moving the stack pointer adjustment past code
7846 which reads from the local frame, else an interrupt could
7847 occur after the SP adjustment and clobber data in the local
7849 emit_insn (gen_blockage ());
7850 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7851 &live_regs_mask, true);
7854 if (SHMEDIA_REGS_STACK_ADJUST ())
7856 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7858 ? "__GCC_pop_shmedia_regs"
7859 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7860 /* This must NOT go through the PLT, otherwise mach and macl
7861 may be clobbered. */
7862 emit_insn (gen_shmedia_save_restore_regs_compact
7863 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7866 /* Pop all the registers. */
7868 if (target_flags != save_flags && ! current_function_interrupt)
7869 emit_insn (gen_toggle_sz ());
7872 int offset_base, offset;
7873 int offset_in_r0 = -1;
7875 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7876 save_schedule schedule;
7880 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7881 offset_base = -entry[1].offset + d_rounding;
7882 tmp_pnt = schedule.temps;
7883 for (; entry->mode != VOIDmode; entry--)
7885 enum machine_mode mode = (enum machine_mode) entry->mode;
7886 int reg = entry->reg;
7887 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7889 offset = offset_base + entry->offset;
7890 reg_rtx = gen_rtx_REG (mode, reg);
7892 mem_rtx = gen_frame_mem (mode,
7893 gen_rtx_PLUS (Pmode,
7897 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7900 if (HAVE_POST_INCREMENT
7901 && (offset == offset_in_r0
7902 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7903 && mem_rtx == NULL_RTX)
7904 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7906 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7908 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7909 post_inc = NULL_RTX;
7914 if (mem_rtx != NULL_RTX)
7917 if (offset_in_r0 == -1)
7919 emit_move_insn (r0, GEN_INT (offset));
7920 offset_in_r0 = offset;
7922 else if (offset != offset_in_r0)
7927 GEN_INT (offset - offset_in_r0)));
7928 offset_in_r0 += offset - offset_in_r0;
7931 if (post_inc != NULL_RTX)
7937 (Pmode, r0, stack_pointer_rtx));
7943 offset_in_r0 += GET_MODE_SIZE (mode);
7946 mem_rtx = gen_frame_mem (mode, r0);
7948 mem_rtx = gen_frame_mem (mode,
7949 gen_rtx_PLUS (Pmode,
7953 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7954 || mem_rtx == post_inc);
7957 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7958 && mem_rtx != post_inc)
7960 emit_move_insn (r0, mem_rtx);
7963 else if (TARGET_REGISTER_P (reg))
7965 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7967 /* Give the scheduler a bit of freedom by using up to
7968 MAX_TEMPS registers in a round-robin fashion. */
7969 emit_move_insn (tmp_reg, mem_rtx);
7972 tmp_pnt = schedule.temps;
7975 emit_move_insn (reg_rtx, mem_rtx);
7978 gcc_assert (entry->offset + offset_base == d + d_rounding);
7980 else /* ! TARGET_SH5 */
7985 /* For an ISR with RESBANK attribute assigned, don't pop PR
7987 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7988 && !sh_cfun_resbank_handler_p ())
7990 if (!frame_pointer_needed)
7991 emit_insn (gen_blockage ());
7995 /* Banked registers are popped first to avoid being scheduled in the
7996 delay slot. RTE switches banks before the ds instruction. */
7997 if (current_function_interrupt)
7999 bool use_movml = false;
8003 unsigned int count = 0;
8005 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8006 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8011 /* Use movml when all banked register are poped. */
8012 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8016 if (sh_cfun_resbank_handler_p ())
8020 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8022 /* We must avoid scheduling multiple load insn with another
8024 emit_insn (gen_blockage ());
8025 emit_insn (gen_movml_pop_banked (sp_reg));
8026 emit_insn (gen_blockage ());
8029 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8030 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8033 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8036 last_reg = FIRST_PSEUDO_REGISTER;
8038 for (i = 0; i < last_reg; i++)
8040 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8042 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8043 && hard_reg_set_intersect_p (live_regs_mask,
8044 reg_class_contents[DF_REGS]))
8046 /* For an ISR with RESBANK attribute assigned, don't pop
8047 following registers, R0-R14, MACH, MACL and GBR. */
8048 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8049 && ! (sh_cfun_resbank_handler_p ()
8050 && ((j >= FIRST_GENERAL_REG
8051 && j < LAST_GENERAL_REG)
8057 if (j == FIRST_FP_REG && fpscr_deferred)
8061 if (target_flags != save_flags && ! current_function_interrupt)
8062 emit_insn (gen_toggle_sz ());
8063 target_flags = save_flags;
8065 output_stack_adjust (crtl->args.pretend_args_size
8066 + save_size + d_rounding
8067 + crtl->args.info.stack_regs * 8,
8068 stack_pointer_rtx, e, NULL, true);
8070 if (crtl->calls_eh_return)
8071 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8072 EH_RETURN_STACKADJ_RTX));
8074 /* Switch back to the normal stack if necessary. */
8075 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8076 emit_insn (gen_sp_switch_2 ());
8078 /* Tell flow the insn that pops PR isn't dead. */
8079 /* PR_REG will never be live in SHmedia mode, and we don't need to
8080 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8081 by the return pattern. */
8082 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8083 emit_use (gen_rtx_REG (SImode, PR_REG));
8086 /* Emit code to change the current function's return address to RA.
8087 TEMP is available as a scratch register, if needed. */
8089 sh_set_return_address (rtx ra, rtx tmp)
8091 HARD_REG_SET live_regs_mask;
8093 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8096 d = calc_live_regs (&live_regs_mask);
8098 /* If pr_reg isn't life, we can set it (or the register given in
8099 sh_media_register_for_return) directly. */
8100 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8106 int rr_regno = sh_media_register_for_return ();
8111 rr = gen_rtx_REG (DImode, rr_regno);
8114 rr = gen_rtx_REG (SImode, pr_reg);
8116 emit_insn (GEN_MOV (rr, ra));
8117 /* Tell flow the register for return isn't dead. */
8125 save_schedule schedule;
8128 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8129 offset = entry[1].offset;
8130 for (; entry->mode != VOIDmode; entry--)
8131 if (entry->reg == pr_reg)
8134 /* We can't find pr register. */
8138 offset = entry->offset - offset;
8139 pr_offset = (rounded_frame_size (d) + offset
8140 + SHMEDIA_REGS_STACK_ADJUST ());
8143 pr_offset = rounded_frame_size (d);
8145 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8147 if (frame_pointer_needed)
8148 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8150 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8152 tmp = gen_frame_mem (Pmode, tmp);
8153 emit_insn (GEN_MOV (tmp, ra));
8154 /* Tell this store isn't dead. */
8158 /* Clear variables at function end. */
8160 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8161 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8166 sh_builtin_saveregs (void)
8168 /* First unnamed integer register. */
8169 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8170 /* Number of integer registers we need to save. */
8171 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8172 /* First unnamed SFmode float reg */
8173 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8174 /* Number of SFmode float regs to save. */
8175 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8178 alias_set_type alias_set;
8184 int pushregs = n_intregs;
8186 while (pushregs < NPARM_REGS (SImode) - 1
8187 && (CALL_COOKIE_INT_REG_GET
8188 (crtl->args.info.call_cookie,
8189 NPARM_REGS (SImode) - pushregs)
8192 crtl->args.info.call_cookie
8193 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8198 if (pushregs == NPARM_REGS (SImode))
8199 crtl->args.info.call_cookie
8200 |= (CALL_COOKIE_INT_REG (0, 1)
8201 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8203 crtl->args.info.call_cookie
8204 |= CALL_COOKIE_STACKSEQ (pushregs);
8206 crtl->args.pretend_args_size += 8 * n_intregs;
8208 if (TARGET_SHCOMPACT)
8212 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8214 error ("__builtin_saveregs not supported by this subtarget");
8221 /* Allocate block of memory for the regs. */
8222 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8223 Or can assign_stack_local accept a 0 SIZE argument? */
8224 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8227 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8228 else if (n_floatregs & 1)
8232 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8233 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8234 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8235 regbuf = change_address (regbuf, BLKmode, addr);
8237 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8241 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8242 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8243 XEXP (regbuf, 0), 4));
8244 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8245 emit_insn (gen_andsi3 (addr, addr, mask));
8246 regbuf = change_address (regbuf, BLKmode, addr);
8249 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8250 alias_set = get_varargs_alias_set ();
8251 set_mem_alias_set (regbuf, alias_set);
8254 This is optimized to only save the regs that are necessary. Explicitly
8255 named args need not be saved. */
8257 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8258 adjust_address (regbuf, BLKmode,
8259 n_floatregs * UNITS_PER_WORD),
8263 /* Return the address of the regbuf. */
8264 return XEXP (regbuf, 0);
8267 This is optimized to only save the regs that are necessary. Explicitly
8268 named args need not be saved.
8269 We explicitly build a pointer to the buffer because it halves the insn
8270 count when not optimizing (otherwise the pointer is built for each reg
8272 We emit the moves in reverse order so that we can use predecrement. */
8274 fpregs = copy_to_mode_reg (Pmode,
8275 plus_constant (Pmode, XEXP (regbuf, 0),
8276 n_floatregs * UNITS_PER_WORD));
8277 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8280 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8282 emit_insn (gen_addsi3 (fpregs, fpregs,
8283 GEN_INT (-2 * UNITS_PER_WORD)));
8284 mem = change_address (regbuf, DFmode, fpregs);
8285 emit_move_insn (mem,
8286 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8288 regno = first_floatreg;
8291 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8292 mem = change_address (regbuf, SFmode, fpregs);
8293 emit_move_insn (mem,
8294 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8295 + regno - SH_REG_MSW_OFFSET));
8299 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8303 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8304 mem = change_address (regbuf, SFmode, fpregs);
8305 emit_move_insn (mem,
8306 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8309 /* Return the address of the regbuf. */
8310 return XEXP (regbuf, 0);
8313 /* Define the `__builtin_va_list' type for the ABI. */
8315 sh_build_builtin_va_list (void)
8317 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8318 tree record, type_decl;
8320 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8321 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8322 return ptr_type_node;
8324 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8325 type_decl = build_decl (BUILTINS_LOCATION,
8326 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8328 f_next_o = build_decl (BUILTINS_LOCATION,
8329 FIELD_DECL, get_identifier ("__va_next_o"),
8331 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8333 get_identifier ("__va_next_o_limit"),
8335 f_next_fp = build_decl (BUILTINS_LOCATION,
8336 FIELD_DECL, get_identifier ("__va_next_fp"),
8338 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8340 get_identifier ("__va_next_fp_limit"),
8342 f_next_stack = build_decl (BUILTINS_LOCATION,
8343 FIELD_DECL, get_identifier ("__va_next_stack"),
8346 DECL_FIELD_CONTEXT (f_next_o) = record;
8347 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8348 DECL_FIELD_CONTEXT (f_next_fp) = record;
8349 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8350 DECL_FIELD_CONTEXT (f_next_stack) = record;
8352 TYPE_STUB_DECL (record) = type_decl;
8353 TYPE_NAME (record) = type_decl;
8354 TYPE_FIELDS (record) = f_next_o;
8355 DECL_CHAIN (f_next_o) = f_next_o_limit;
8356 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8357 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8358 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8360 layout_type (record);
8365 /* Implement `va_start' for varargs and stdarg. */
8367 sh_va_start (tree valist, rtx nextarg)
8369 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8370 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8376 expand_builtin_saveregs ();
8377 std_expand_builtin_va_start (valist, nextarg);
8381 if ((! TARGET_SH2E && ! TARGET_SH4)
8382 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8384 std_expand_builtin_va_start (valist, nextarg);
8388 f_next_o = TYPE_FIELDS (va_list_type_node);
8389 f_next_o_limit = DECL_CHAIN (f_next_o);
8390 f_next_fp = DECL_CHAIN (f_next_o_limit);
8391 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8392 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8394 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8396 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8397 valist, f_next_o_limit, NULL_TREE);
8398 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8400 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8401 valist, f_next_fp_limit, NULL_TREE);
8402 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8403 valist, f_next_stack, NULL_TREE);
8405 /* Call __builtin_saveregs. */
8406 u = make_tree (sizetype, expand_builtin_saveregs ());
8407 u = fold_convert (ptr_type_node, u);
8408 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8409 TREE_SIDE_EFFECTS (t) = 1;
8410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8412 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8417 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8418 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8419 TREE_SIDE_EFFECTS (t) = 1;
8420 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8422 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8423 TREE_SIDE_EFFECTS (t) = 1;
8424 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8426 nint = crtl->args.info.arg_count[SH_ARG_INT];
8431 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8432 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8433 TREE_SIDE_EFFECTS (t) = 1;
8434 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8436 u = make_tree (ptr_type_node, nextarg);
8437 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8438 TREE_SIDE_EFFECTS (t) = 1;
8439 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8442 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8443 member, return it. */
8445 find_sole_member (tree type)
8447 tree field, member = NULL_TREE;
8449 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8451 if (TREE_CODE (field) != FIELD_DECL)
8453 if (!DECL_SIZE (field))
8455 if (integer_zerop (DECL_SIZE (field)))
8464 /* Implement `va_arg'. */
8466 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8467 gimple_seq *post_p ATTRIBUTE_UNUSED)
8469 HOST_WIDE_INT size, rsize;
8470 tree tmp, pptr_type_node;
8471 tree addr, lab_over = NULL, result = NULL;
8475 if (!VOID_TYPE_P (type))
8476 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8478 pass_by_ref = false;
8481 type = build_pointer_type (type);
8483 size = int_size_in_bytes (type);
8484 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8485 pptr_type_node = build_pointer_type (ptr_type_node);
8487 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8488 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8490 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8491 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8496 f_next_o = TYPE_FIELDS (va_list_type_node);
8497 f_next_o_limit = DECL_CHAIN (f_next_o);
8498 f_next_fp = DECL_CHAIN (f_next_o_limit);
8499 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8500 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8502 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8504 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8505 valist, f_next_o_limit, NULL_TREE);
8506 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8507 valist, f_next_fp, NULL_TREE);
8508 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8509 valist, f_next_fp_limit, NULL_TREE);
8510 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8511 valist, f_next_stack, NULL_TREE);
8513 /* Structures with a single member with a distinct mode are passed
8514 like their member. This is relevant if the latter has a REAL_TYPE
8515 or COMPLEX_TYPE type. */
8517 while (TREE_CODE (eff_type) == RECORD_TYPE
8518 && (member = find_sole_member (eff_type))
8519 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8520 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8521 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8523 tree field_type = TREE_TYPE (member);
8525 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8526 eff_type = field_type;
8529 gcc_assert ((TYPE_ALIGN (eff_type)
8530 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8531 || (TYPE_ALIGN (eff_type)
8532 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8537 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8539 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8540 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8541 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8546 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8549 addr = create_tmp_var (pptr_type_node, NULL);
8550 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8551 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8553 valist = build_simple_mem_ref (addr);
8557 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8559 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8561 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8562 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8564 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8565 tmp = next_fp_limit;
8566 if (size > 4 && !is_double)
8567 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8568 tmp = build2 (GE_EXPR, boolean_type_node,
8569 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8570 cmp = build3 (COND_EXPR, void_type_node, tmp,
8571 build1 (GOTO_EXPR, void_type_node,
8572 unshare_expr (lab_false)), NULL_TREE);
8574 gimplify_and_add (cmp, pre_p);
8576 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8577 || (is_double || size == 16))
8579 tmp = fold_convert (sizetype, next_fp_tmp);
8580 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8581 size_int (UNITS_PER_WORD));
8582 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8583 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8586 gimplify_and_add (cmp, pre_p);
8588 #ifdef FUNCTION_ARG_SCmode_WART
8589 if (TYPE_MODE (eff_type) == SCmode
8590 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8592 tree subtype = TREE_TYPE (eff_type);
8596 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8597 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8600 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8601 real = get_initialized_tmp_var (real, pre_p, NULL);
8603 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8604 if (type != eff_type)
8605 result = build1 (VIEW_CONVERT_EXPR, type, result);
8606 result = get_initialized_tmp_var (result, pre_p, NULL);
8608 #endif /* FUNCTION_ARG_SCmode_WART */
8610 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8611 gimplify_and_add (tmp, pre_p);
8613 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8614 gimplify_and_add (tmp, pre_p);
8616 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8617 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8618 gimplify_assign (unshare_expr (next_fp_tmp),
8619 unshare_expr (valist), pre_p);
8621 gimplify_assign (unshare_expr (valist),
8622 unshare_expr (next_fp_tmp), post_p);
8623 valist = next_fp_tmp;
8627 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8628 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8629 unshare_expr (next_o_limit));
8630 tmp = build3 (COND_EXPR, void_type_node, tmp,
8631 build1 (GOTO_EXPR, void_type_node,
8632 unshare_expr (lab_false)),
8634 gimplify_and_add (tmp, pre_p);
8636 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8637 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8639 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8640 gimplify_and_add (tmp, pre_p);
8642 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8643 gimplify_and_add (tmp, pre_p);
8645 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8646 gimplify_assign (unshare_expr (next_o),
8647 unshare_expr (next_o_limit), pre_p);
8649 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8650 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8655 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8656 gimplify_and_add (tmp, pre_p);
8660 /* ??? In va-sh.h, there had been code to make values larger than
8661 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8663 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8666 gimplify_assign (result, tmp, pre_p);
8667 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8668 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8669 gimplify_and_add (tmp, pre_p);
8675 result = build_va_arg_indirect_ref (result);
8680 /* 64 bit floating points memory transfers are paired single precision loads
8681 or store. So DWARF information needs fixing in little endian (unless
8682 PR=SZ=1 in FPSCR). */
8684 sh_dwarf_register_span (rtx reg)
8686 unsigned regno = REGNO (reg);
8688 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8692 gen_rtx_PARALLEL (VOIDmode,
8694 gen_rtx_REG (SFmode, regno + 1),
8695 gen_rtx_REG (SFmode, regno)));
8698 static enum machine_mode
8699 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8700 int *punsignedp, const_tree funtype,
8703 if (sh_promote_prototypes (funtype))
8704 return promote_mode (type, mode, punsignedp);
8706 return default_promote_function_mode (type, mode, punsignedp, funtype,
8711 sh_promote_prototypes (const_tree type)
8717 return ! sh_attr_renesas_p (type);
8720 /* Whether an argument must be passed by reference. On SHcompact, we
8721 pretend arguments wider than 32-bits that would have been passed in
8722 registers are passed by reference, so that an SHmedia trampoline
8723 loads them into the full 64-bits registers. */
8725 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8726 const_tree type, bool named)
8728 unsigned HOST_WIDE_INT size;
8731 size = int_size_in_bytes (type);
8733 size = GET_MODE_SIZE (mode);
8735 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8737 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8738 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8739 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8741 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8742 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8749 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8750 const_tree type, bool named)
8752 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8754 if (targetm.calls.must_pass_in_stack (mode, type))
8757 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8758 wants to know about pass-by-reference semantics for incoming
8763 if (TARGET_SHCOMPACT)
8765 cum->byref = shcompact_byref (cum, mode, type, named);
8766 return cum->byref != 0;
8773 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8774 const_tree type, bool named ATTRIBUTE_UNUSED)
8776 /* ??? How can it possibly be correct to return true only on the
8777 caller side of the equation? Is there someplace else in the
8778 sh backend that's magically producing the copies? */
8779 return (get_cumulative_args (cum)->outgoing
8780 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8781 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8785 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8786 tree type, bool named ATTRIBUTE_UNUSED)
8788 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8792 && PASS_IN_REG_P (*cum, mode, type)
8793 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8794 && (ROUND_REG (*cum, mode)
8796 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8797 : ROUND_ADVANCE (int_size_in_bytes (type)))
8798 > NPARM_REGS (mode)))
8799 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8801 else if (!TARGET_SHCOMPACT
8802 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8803 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8805 return words * UNITS_PER_WORD;
8809 /* Define where to put the arguments to a function.
8810 Value is zero to push the argument on the stack,
8811 or a hard register in which to store the argument.
8813 MODE is the argument's machine mode.
8814 TYPE is the data type of the argument (as a tree).
8815 This is null for libcalls where that information may
8817 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8818 the preceding args and about the function being called.
8819 NAMED is nonzero if this argument is a named parameter
8820 (otherwise it is an extra parameter matching an ellipsis).
8822 On SH the first args are normally in registers
8823 and the rest are pushed. Any arg that starts within the first
8824 NPARM_REGS words is at least partially passed in a register unless
8825 its data type forbids. */
8827 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8828 const_tree type, bool named)
8830 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8832 if (! TARGET_SH5 && mode == VOIDmode)
8833 return GEN_INT (ca->renesas_abi ? 1 : 0);
8836 && PASS_IN_REG_P (*ca, mode, type)
8837 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8841 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8842 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8844 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8845 gen_rtx_REG (SFmode,
8847 + (ROUND_REG (*ca, mode) ^ 1)),
8849 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8850 gen_rtx_REG (SFmode,
8852 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8854 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8857 /* If the alignment of a DF value causes an SF register to be
8858 skipped, we will use that skipped register for the next SF
8860 if ((TARGET_HITACHI || ca->renesas_abi)
8861 && ca->free_single_fp_reg
8863 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8865 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8866 ^ (mode == SFmode && TARGET_SH4
8867 && TARGET_LITTLE_ENDIAN
8868 && ! TARGET_HITACHI && ! ca->renesas_abi);
8869 return gen_rtx_REG (mode, regno);
8875 if (mode == VOIDmode && TARGET_SHCOMPACT)
8876 return GEN_INT (ca->call_cookie);
8878 /* The following test assumes unnamed arguments are promoted to
8880 if (mode == SFmode && ca->free_single_fp_reg)
8881 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8883 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8884 && (named || ! ca->prototype_p)
8885 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8887 if (! ca->prototype_p && TARGET_SHMEDIA)
8888 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8890 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8892 + ca->arg_count[(int) SH_ARG_FLOAT]);
8895 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8896 && (! TARGET_SHCOMPACT
8897 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8898 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8901 return gen_rtx_REG (mode, (FIRST_PARM_REG
8902 + ca->arg_count[(int) SH_ARG_INT]));
8911 /* Update the data in CUM to advance over an argument
8912 of mode MODE and data type TYPE.
8913 (TYPE is null for libcalls where that information may not be
8916 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8917 const_tree type, bool named)
8919 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8923 else if (TARGET_SH5)
8925 const_tree type2 = (ca->byref && type
8928 enum machine_mode mode2 = (ca->byref && type
8931 int dwords = ((ca->byref
8934 ? int_size_in_bytes (type2)
8935 : GET_MODE_SIZE (mode2)) + 7) / 8;
8936 int numregs = MIN (dwords, NPARM_REGS (SImode)
8937 - ca->arg_count[(int) SH_ARG_INT]);
8941 ca->arg_count[(int) SH_ARG_INT] += numregs;
8942 if (TARGET_SHCOMPACT
8943 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8946 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8948 /* N.B. We want this also for outgoing. */
8949 ca->stack_regs += numregs;
8954 ca->stack_regs += numregs;
8955 ca->byref_regs += numregs;
8959 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8963 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8966 else if (dwords > numregs)
8968 int pushregs = numregs;
8970 if (TARGET_SHCOMPACT)
8971 ca->stack_regs += numregs;
8972 while (pushregs < NPARM_REGS (SImode) - 1
8973 && (CALL_COOKIE_INT_REG_GET
8975 NPARM_REGS (SImode) - pushregs)
8979 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8983 if (numregs == NPARM_REGS (SImode))
8985 |= CALL_COOKIE_INT_REG (0, 1)
8986 | CALL_COOKIE_STACKSEQ (numregs - 1);
8989 |= CALL_COOKIE_STACKSEQ (numregs);
8992 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8993 && (named || ! ca->prototype_p))
8995 if (mode2 == SFmode && ca->free_single_fp_reg)
8996 ca->free_single_fp_reg = 0;
8997 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8998 < NPARM_REGS (SFmode))
9001 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9003 - ca->arg_count[(int) SH_ARG_FLOAT]);
9005 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9007 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9009 if (ca->outgoing && numregs > 0)
9013 |= (CALL_COOKIE_INT_REG
9014 (ca->arg_count[(int) SH_ARG_INT]
9015 - numregs + ((numfpregs - 2) / 2),
9016 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9019 while (numfpregs -= 2);
9021 else if (mode2 == SFmode && (named)
9022 && (ca->arg_count[(int) SH_ARG_FLOAT]
9023 < NPARM_REGS (SFmode)))
9024 ca->free_single_fp_reg
9025 = FIRST_FP_PARM_REG - numfpregs
9026 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9032 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9034 /* Note that we've used the skipped register. */
9035 if (mode == SFmode && ca->free_single_fp_reg)
9037 ca->free_single_fp_reg = 0;
9040 /* When we have a DF after an SF, there's an SF register that get
9041 skipped in order to align the DF value. We note this skipped
9042 register, because the next SF value will use it, and not the
9043 SF that follows the DF. */
9045 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
9047 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
9048 + BASE_ARG_REG (mode));
9052 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9053 || PASS_IN_REG_P (*ca, mode, type))
9054 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9055 = (ROUND_REG (*ca, mode)
9057 ? ROUND_ADVANCE (int_size_in_bytes (type))
9058 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
9061 /* The Renesas calling convention doesn't quite fit into this scheme since
9062 the address is passed like an invisible argument, but one that is always
9063 passed in memory. */
9065 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9067 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9069 return gen_rtx_REG (Pmode, 2);
9072 /* Worker function for TARGET_FUNCTION_VALUE.
9074 For the SH, this is like LIBCALL_VALUE, except that we must change the
9075 mode like PROMOTE_MODE does.
9076 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9077 tested here has to be kept in sync with the one in
9078 explow.c:promote_mode. */
9080 sh_function_value (const_tree valtype,
9081 const_tree fn_decl_or_type,
9082 bool outgoing ATTRIBUTE_UNUSED)
9085 && !DECL_P (fn_decl_or_type))
9086 fn_decl_or_type = NULL;
9088 return gen_rtx_REG (
9089 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9090 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9091 && (TREE_CODE (valtype) == INTEGER_TYPE
9092 || TREE_CODE (valtype) == ENUMERAL_TYPE
9093 || TREE_CODE (valtype) == BOOLEAN_TYPE
9094 || TREE_CODE (valtype) == REAL_TYPE
9095 || TREE_CODE (valtype) == OFFSET_TYPE))
9096 && sh_promote_prototypes (fn_decl_or_type)
9097 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9098 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9101 /* Worker function for TARGET_LIBCALL_VALUE. */
9103 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9105 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9108 /* Return true if N is a possible register number of function value. */
9110 sh_function_value_regno_p (const unsigned int regno)
9112 return ((regno) == FIRST_RET_REG
9113 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9114 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9117 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9119 sh_return_in_memory (const_tree type, const_tree fndecl)
9123 if (TYPE_MODE (type) == BLKmode)
9124 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9126 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9130 return (TYPE_MODE (type) == BLKmode
9131 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9132 && TREE_CODE (type) == RECORD_TYPE));
9136 /* We actually emit the code in sh_expand_prologue. We used to use
9137 a static variable to flag that we need to emit this code, but that
9138 doesn't when inlining, when functions are deferred and then emitted
9139 later. Fortunately, we already have two flags that are part of struct
9140 function that tell if a function uses varargs or stdarg. */
9142 sh_setup_incoming_varargs (cumulative_args_t ca,
9143 enum machine_mode mode,
9145 int *pretend_arg_size,
9146 int second_time ATTRIBUTE_UNUSED)
9148 gcc_assert (cfun->stdarg);
9149 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9151 int named_parm_regs, anon_parm_regs;
9153 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
9155 ? ROUND_ADVANCE (int_size_in_bytes (type))
9156 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
9157 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9158 if (anon_parm_regs > 0)
9159 *pretend_arg_size = anon_parm_regs * 4;
9164 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9170 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9172 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9174 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9178 /* Define the offset between two registers, one to be eliminated, and
9179 the other its replacement, at the start of a routine. */
9181 initial_elimination_offset (int from, int to)
9184 int regs_saved_rounding = 0;
9185 int total_saved_regs_space;
9186 int total_auto_space;
9187 int save_flags = target_flags;
9189 HARD_REG_SET live_regs_mask;
9191 shmedia_space_reserved_for_target_registers = false;
9192 regs_saved = calc_live_regs (&live_regs_mask);
9193 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9195 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9197 shmedia_space_reserved_for_target_registers = true;
9198 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9201 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9202 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9203 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9205 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9206 copy_flags = target_flags;
9207 target_flags = save_flags;
9209 total_saved_regs_space = regs_saved + regs_saved_rounding;
9211 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9212 return total_saved_regs_space + total_auto_space
9213 + crtl->args.info.byref_regs * 8;
9215 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9216 return total_saved_regs_space + total_auto_space
9217 + crtl->args.info.byref_regs * 8;
9219 /* Initial gap between fp and sp is 0. */
9220 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9223 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9224 return rounded_frame_size (0);
9226 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9227 return rounded_frame_size (0);
9229 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9230 && (to == HARD_FRAME_POINTER_REGNUM
9231 || to == STACK_POINTER_REGNUM));
9234 int n = total_saved_regs_space;
9235 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9236 save_schedule schedule;
9239 n += total_auto_space;
9241 /* If it wasn't saved, there's not much we can do. */
9242 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9245 target_flags = copy_flags;
9247 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9248 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9249 if (entry->reg == pr_reg)
9251 target_flags = save_flags;
9252 return entry->offset;
9257 return total_auto_space;
9260 /* Parse the -mfixed-range= option string. */
9262 sh_fix_range (const char *const_str)
9265 char *str, *dash, *comma;
9267 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9268 REG2 are either register names or register numbers. The effect
9269 of this option is to mark the registers in the range from REG1 to
9270 REG2 as ``fixed'' so they won't be used by the compiler. */
9272 i = strlen (const_str);
9273 str = (char *) alloca (i + 1);
9274 memcpy (str, const_str, i + 1);
9278 dash = strchr (str, '-');
9281 warning (0, "value of -mfixed-range must have form REG1-REG2");
9285 comma = strchr (dash + 1, ',');
9289 first = decode_reg_name (str);
9292 warning (0, "unknown register name: %s", str);
9296 last = decode_reg_name (dash + 1);
9299 warning (0, "unknown register name: %s", dash + 1);
9307 warning (0, "%s-%s is an empty range", str, dash + 1);
9311 for (i = first; i <= last; ++i)
9312 fixed_regs[i] = call_used_regs[i] = 1;
9322 /* Insert any deferred function attributes from earlier pragmas. */
9324 sh_insert_attributes (tree node, tree *attributes)
9328 if (TREE_CODE (node) != FUNCTION_DECL)
9331 /* We are only interested in fields. */
9335 /* Append the attributes to the deferred attributes. */
9336 *sh_deferred_function_attributes_tail = *attributes;
9337 attrs = sh_deferred_function_attributes;
9341 /* Some attributes imply or require the interrupt attribute. */
9342 if (!lookup_attribute ("interrupt_handler", attrs)
9343 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9345 /* If we have a trapa_handler, but no interrupt_handler attribute,
9346 insert an interrupt_handler attribute. */
9347 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9348 /* We can't use sh_pr_interrupt here because that's not in the
9351 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9352 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9353 if the interrupt attribute is missing, we ignore the attribute
9355 else if (lookup_attribute ("sp_switch", attrs)
9356 || lookup_attribute ("trap_exit", attrs)
9357 || lookup_attribute ("nosave_low_regs", attrs)
9358 || lookup_attribute ("resbank", attrs))
9362 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9364 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9365 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9366 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9367 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9368 warning (OPT_Wattributes,
9369 "%qE attribute only applies to interrupt functions",
9370 TREE_PURPOSE (attrs));
9373 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9375 tail = &TREE_CHAIN (*tail);
9378 attrs = *attributes;
9382 /* Install the processed list. */
9383 *attributes = attrs;
9385 /* Clear deferred attributes. */
9386 sh_deferred_function_attributes = NULL_TREE;
9387 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9392 /*------------------------------------------------------------------------------
9393 Target specific attributes
9394 Supported attributes are:
9397 Specifies this function is an interrupt handler.
9400 Like interrupt_handler, but don't save all registers.
9403 Specifies an alternate stack for an interrupt handler to run on.
9406 Use a trapa to exit an interrupt function instead of rte.
9409 Don't save r0..r7 in an interrupt handler function.
9410 This is useful on SH3* and SH4*, which have a separate set of low
9411 regs for user and privileged modes.
9412 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9413 those that run with interrupts disabled and thus can't be
9414 interrupted thenselves).
9417 Use Renesas calling/layout conventions (functions and structures).
9420 In case of an interrupt handler function, use a register bank to
9421 save registers R0-R14, MACH, MACL, GBR and PR.
9422 This is available only on SH2A targets.
9425 Declares a function to be called using the TBR relative addressing
9426 mode. Takes an argument that specifies the slot number in the table
9427 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9430 /* Handle a 'resbank' attribute. */
9432 sh_handle_resbank_handler_attribute (tree * node, tree name,
9433 tree args ATTRIBUTE_UNUSED,
9434 int flags ATTRIBUTE_UNUSED,
9435 bool * no_add_attrs)
9439 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9441 *no_add_attrs = true;
9443 if (TREE_CODE (*node) != FUNCTION_DECL)
9445 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9447 *no_add_attrs = true;
9453 /* Handle an "interrupt_handler" attribute; arguments as in
9454 struct attribute_spec.handler. */
9456 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9457 tree args ATTRIBUTE_UNUSED,
9458 int flags ATTRIBUTE_UNUSED,
9461 if (TREE_CODE (*node) != FUNCTION_DECL)
9463 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9465 *no_add_attrs = true;
9467 else if (TARGET_SHCOMPACT)
9469 error ("attribute interrupt_handler is not compatible with -m5-compact");
9470 *no_add_attrs = true;
9476 /* Handle an 'function_vector' attribute; arguments as in
9477 struct attribute_spec.handler. */
9479 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9480 tree args ATTRIBUTE_UNUSED,
9481 int flags ATTRIBUTE_UNUSED,
9482 bool * no_add_attrs)
9486 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9488 *no_add_attrs = true;
9490 else if (TREE_CODE (*node) != FUNCTION_DECL)
9492 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9494 *no_add_attrs = true;
9496 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9498 /* The argument must be a constant integer. */
9499 warning (OPT_Wattributes,
9500 "%qE attribute argument not an integer constant",
9502 *no_add_attrs = true;
9504 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9506 /* The argument value must be between 0 to 255. */
9507 warning (OPT_Wattributes,
9508 "%qE attribute argument should be between 0 to 255",
9510 *no_add_attrs = true;
9515 /* Returns true if current function has been assigned the attribute
9516 'function_vector'. */
9518 sh2a_is_function_vector_call (rtx x)
9520 if (GET_CODE (x) == SYMBOL_REF
9521 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9523 tree tr = SYMBOL_REF_DECL (x);
9525 if (sh2a_function_vector_p (tr))
9532 /* Returns the function vector number, if the attribute
9533 'function_vector' is assigned, otherwise returns zero. */
9535 sh2a_get_function_vector_number (rtx x)
9540 if ((GET_CODE (x) == SYMBOL_REF)
9541 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9543 t = SYMBOL_REF_DECL (x);
9545 if (TREE_CODE (t) != FUNCTION_DECL)
9548 list = SH_ATTRIBUTES (t);
9551 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9553 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9557 list = TREE_CHAIN (list);
9566 /* Handle an "sp_switch" attribute; arguments as in
9567 struct attribute_spec.handler. */
9569 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9570 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9572 if (TREE_CODE (*node) != FUNCTION_DECL)
9574 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9576 *no_add_attrs = true;
9578 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9580 /* The argument must be a constant string. */
9581 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9583 *no_add_attrs = true;
9589 /* Handle an "trap_exit" attribute; arguments as in
9590 struct attribute_spec.handler. */
9592 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9593 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9595 if (TREE_CODE (*node) != FUNCTION_DECL)
9597 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9599 *no_add_attrs = true;
9601 /* The argument specifies a trap number to be used in a trapa instruction
9602 at function exit (instead of an rte instruction). */
9603 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9605 /* The argument must be a constant integer. */
9606 warning (OPT_Wattributes, "%qE attribute argument not an "
9607 "integer constant", name);
9608 *no_add_attrs = true;
9615 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9616 tree name ATTRIBUTE_UNUSED,
9617 tree args ATTRIBUTE_UNUSED,
9618 int flags ATTRIBUTE_UNUSED,
9619 bool *no_add_attrs ATTRIBUTE_UNUSED)
9624 /* True if __attribute__((renesas)) or -mrenesas. */
9626 sh_attr_renesas_p (const_tree td)
9630 if (td == NULL_TREE)
9633 td = TREE_TYPE (td);
9634 if (td == error_mark_node)
9636 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9640 /* True if __attribute__((renesas)) or -mrenesas, for the current
9643 sh_cfun_attr_renesas_p (void)
9645 return sh_attr_renesas_p (current_function_decl);
9648 /* Returns true if the current function has the "interrupt_handler"
9651 sh_cfun_interrupt_handler_p (void)
9653 return (lookup_attribute ("interrupt_handler",
9654 DECL_ATTRIBUTES (current_function_decl))
9658 /* Returns true if FUNC has been assigned the attribute
9659 "function_vector". */
9661 sh2a_function_vector_p (tree func)
9664 if (TREE_CODE (func) != FUNCTION_DECL)
9667 list = SH_ATTRIBUTES (func);
9670 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9673 list = TREE_CHAIN (list);
9678 /* Returns true if given tree has the "resbank" attribute set. */
9680 sh_cfun_resbank_handler_p (void)
9682 return ((lookup_attribute ("resbank",
9683 DECL_ATTRIBUTES (current_function_decl))
9685 && (lookup_attribute ("interrupt_handler",
9686 DECL_ATTRIBUTES (current_function_decl))
9687 != NULL_TREE) && TARGET_SH2A);
9690 /* Returns true if the current function has a "trap_exit" attribute set. */
9692 sh_cfun_trap_exit_p (void)
9694 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9698 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9700 sh_check_pch_target_flags (int old_flags)
9702 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9703 | MASK_SH_E | MASK_HARD_SH4
9704 | MASK_FPU_SINGLE | MASK_SH4))
9705 return _("created and used with different architectures / ABIs");
9706 if ((old_flags ^ target_flags) & MASK_HITACHI)
9707 return _("created and used with different ABIs");
9708 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9709 return _("created and used with different endianness");
9713 /* Predicates used by the templates. */
9715 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9716 Used only in general_movsrc_operand. */
9718 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9730 /* Returns true if OP is a floating point value with value 0.0. */
9732 fp_zero_operand (rtx op)
9736 if (GET_MODE (op) != SFmode)
9739 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9740 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9743 /* Returns true if OP is a floating point value with value 1.0. */
9745 fp_one_operand (rtx op)
9749 if (GET_MODE (op) != SFmode)
9752 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9753 return REAL_VALUES_EQUAL (r, dconst1);
9756 /* In general mode switching is used. If we are
9757 compiling without -mfmovd, movsf_ie isn't taken into account for
9758 mode switching. We could check in machine_dependent_reorg for
9759 cases where we know we are in single precision mode, but there is
9760 interface to find that out during reload, so we must avoid
9761 choosing an fldi alternative during reload and thus failing to
9762 allocate a scratch register for the constant loading. */
9769 /* Return the TLS type for TLS symbols. */
9771 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9773 if (GET_CODE (op) != SYMBOL_REF)
9774 return TLS_MODEL_NONE;
9775 return SYMBOL_REF_TLS_MODEL (op);
9778 /* Return the destination address of a branch. */
9780 branch_dest (rtx branch)
9782 rtx dest = SET_SRC (PATTERN (branch));
9785 if (GET_CODE (dest) == IF_THEN_ELSE)
9786 dest = XEXP (dest, 1);
9787 dest = XEXP (dest, 0);
9788 dest_uid = INSN_UID (dest);
9789 return INSN_ADDRESSES (dest_uid);
9792 /* Return nonzero if REG is not used after INSN.
9793 We assume REG is a reload reg, and therefore does
9794 not live past labels. It may live past calls or jumps though. */
9796 reg_unused_after (rtx reg, rtx insn)
9801 /* If the reg is set by this instruction, then it is safe for our
9802 case. Disregard the case where this is a store to memory, since
9803 we are checking a register used in the store address. */
9804 set = single_set (insn);
9805 if (set && !MEM_P (SET_DEST (set))
9806 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9809 while ((insn = NEXT_INSN (insn)))
9815 code = GET_CODE (insn);
9818 /* If this is a label that existed before reload, then the register
9819 is dead here. However, if this is a label added by reorg, then
9820 the register may still be live here. We can't tell the difference,
9821 so we just ignore labels completely. */
9822 if (code == CODE_LABEL)
9827 if (code == JUMP_INSN)
9830 /* If this is a sequence, we must handle them all at once.
9831 We could have for instance a call that sets the target register,
9832 and an insn in a delay slot that uses the register. In this case,
9833 we must return 0. */
9834 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9839 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9841 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9842 rtx set = single_set (this_insn);
9844 if (CALL_P (this_insn))
9846 else if (JUMP_P (this_insn))
9848 if (INSN_ANNULLED_BRANCH_P (this_insn))
9853 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9855 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9857 if (!MEM_P (SET_DEST (set)))
9863 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9868 else if (code == JUMP_INSN)
9872 set = single_set (insn);
9873 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9875 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9876 return !MEM_P (SET_DEST (set));
9877 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9880 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9888 static GTY(()) rtx t_reg_rtx;
9890 get_t_reg_rtx (void)
9893 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
9897 static GTY(()) rtx fpscr_rtx;
9899 get_fpscr_rtx (void)
9903 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9904 REG_USERVAR_P (fpscr_rtx) = 1;
9905 mark_user_reg (fpscr_rtx);
9907 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9908 mark_user_reg (fpscr_rtx);
9912 static GTY(()) tree fpscr_values;
9915 emit_fpu_switch (rtx scratch, int index)
9919 if (fpscr_values == NULL)
9923 t = build_index_type (integer_one_node);
9924 t = build_array_type (integer_type_node, t);
9925 t = build_decl (BUILTINS_LOCATION,
9926 VAR_DECL, get_identifier ("__fpscr_values"), t);
9927 DECL_ARTIFICIAL (t) = 1;
9928 DECL_IGNORED_P (t) = 1;
9929 DECL_EXTERNAL (t) = 1;
9930 TREE_STATIC (t) = 1;
9931 TREE_PUBLIC (t) = 1;
9937 src = DECL_RTL (fpscr_values);
9938 if (!can_create_pseudo_p ())
9940 emit_move_insn (scratch, XEXP (src, 0));
9942 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9943 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9946 src = adjust_address (src, PSImode, index * 4);
9948 dst = get_fpscr_rtx ();
9949 emit_move_insn (dst, src);
9953 emit_sf_insn (rtx pat)
9959 emit_df_insn (rtx pat)
9965 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9967 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9971 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9973 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9978 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9980 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9984 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9986 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9990 static rtx get_free_reg (HARD_REG_SET);
9992 /* This function returns a register to use to load the address to load
9993 the fpscr from. Currently it always returns r1 or r7, but when we are
9994 able to use pseudo registers after combine, or have a better mechanism
9995 for choosing a register, it should be done here. */
9996 /* REGS_LIVE is the liveness information for the point for which we
9997 need this allocation. In some bare-bones exit blocks, r1 is live at the
9998 start. We can even have all of r0..r3 being live:
9999 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10000 INSN before which new insns are placed with will clobber the register
10001 we return. If a basic block consists only of setting the return value
10002 register to a pseudo and using that register, the return value is not
10003 live before or after this block, yet we we'll insert our insns right in
10006 get_free_reg (HARD_REG_SET regs_live)
10008 if (! TEST_HARD_REG_BIT (regs_live, 1))
10009 return gen_rtx_REG (Pmode, 1);
10011 /* Hard reg 1 is live; since this is a small register classes target,
10012 there shouldn't be anything but a jump before the function end. */
10013 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10014 return gen_rtx_REG (Pmode, 7);
10017 /* This function will set the fpscr from memory.
10018 MODE is the mode we are setting it to. */
10020 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10022 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10023 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10026 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10027 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10030 /* Is the given character a logical line separator for the assembler? */
10031 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10032 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10036 sequence_insn_p (rtx insn)
10040 prev = PREV_INSN (insn);
10044 next = NEXT_INSN (prev);
10048 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10052 sh_insn_length_adjustment (rtx insn)
10054 /* Instructions with unfilled delay slots take up an extra two bytes for
10055 the nop in the delay slot. */
10056 if (((NONJUMP_INSN_P (insn)
10057 && GET_CODE (PATTERN (insn)) != USE
10058 && GET_CODE (PATTERN (insn)) != CLOBBER)
10059 || CALL_P (insn) || JUMP_P (insn))
10060 && ! sequence_insn_p (insn)
10061 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10064 /* SH2e has a bug that prevents the use of annulled branches, so if
10065 the delay slot is not filled, we'll have to put a NOP in it. */
10066 if (sh_cpu_attr == CPU_SH2E
10068 && get_attr_type (insn) == TYPE_CBRANCH
10069 && ! sequence_insn_p (insn))
10072 /* sh-dsp parallel processing insn take four bytes instead of two. */
10074 if (NONJUMP_INSN_P (insn))
10077 rtx body = PATTERN (insn);
10080 bool maybe_label = true;
10082 if (GET_CODE (body) == ASM_INPUT)
10083 templ = XSTR (body, 0);
10084 else if (asm_noperands (body) >= 0)
10086 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10091 int ppi_adjust = 0;
10095 while (c == ' ' || c == '\t');
10096 /* all sh-dsp parallel-processing insns start with p.
10097 The only non-ppi sh insn starting with p is pref.
10098 The only ppi starting with pr is prnd. */
10099 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10101 /* The repeat pseudo-insn expands two three insns, a total of
10102 six bytes in size. */
10103 else if ((c == 'r' || c == 'R')
10104 && ! strncasecmp ("epeat", templ, 5))
10106 while (c && c != '\n'
10107 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10109 /* If this is a label, it is obviously not a ppi insn. */
10110 if (c == ':' && maybe_label)
10115 else if (c == '\'' || c == '"')
10116 maybe_label = false;
10120 maybe_label = c != ':';
10128 /* Return TRUE for a valid displacement for the REG+disp addressing
10131 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10134 if (! CONST_INT_P (op))
10137 if (TARGET_SHMEDIA)
10141 /* Check if this is the address of an unaligned load / store. */
10142 if (mode == VOIDmode)
10143 return satisfies_constraint_I06 (op);
10145 size = GET_MODE_SIZE (mode);
10146 return (!(INTVAL (op) & (size - 1))
10147 && INTVAL (op) >= -512 * size
10148 && INTVAL (op) < 512 * size);
10152 const HOST_WIDE_INT offset = INTVAL (op);
10153 const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
10154 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10156 /* If the mode does not support any displacement always return false.
10157 Even though an index of '0' is actually always valid, it will cause
10158 troubles when e.g. a DFmode move is split into two SFmode moves,
10159 where one SFmode move will have index '0' and the other move will
10161 if (!allow_zero && max_disp < 1)
10164 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10168 /* Recognize an RTL expression that is a valid memory address for
10170 The MODE argument is the machine mode for the MEM expression
10171 that wants to use this address.
10180 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10182 if (REG_P (x) && REGNO (x) == GBR_REG)
10185 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10187 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10188 && ! TARGET_SHMEDIA
10189 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10191 else if (GET_CODE (x) == PLUS
10192 && (mode != PSImode || reload_completed))
10194 rtx xop0 = XEXP (x, 0);
10195 rtx xop1 = XEXP (x, 1);
10197 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10198 return gbr_displacement (xop1, mode);
10200 if (GET_MODE_SIZE (mode) <= 8
10201 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10202 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10205 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10206 || ((xop0 == stack_pointer_rtx
10207 || xop0 == hard_frame_pointer_rtx)
10208 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10209 || ((xop1 == stack_pointer_rtx
10210 || xop1 == hard_frame_pointer_rtx)
10211 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10212 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10213 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10214 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10215 && TARGET_FMOVD && mode == DFmode)))
10217 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10218 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10220 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10221 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10229 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10230 isn't protected by a PIC unspec. */
10232 nonpic_symbol_mentioned_p (rtx x)
10237 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10238 || GET_CODE (x) == PC)
10241 /* We don't want to look into the possible MEM location of a
10242 CONST_DOUBLE, since we're not going to use it, in general. */
10243 if (GET_CODE (x) == CONST_DOUBLE)
10246 if (GET_CODE (x) == UNSPEC
10247 && (XINT (x, 1) == UNSPEC_PIC
10248 || XINT (x, 1) == UNSPEC_GOT
10249 || XINT (x, 1) == UNSPEC_GOTOFF
10250 || XINT (x, 1) == UNSPEC_GOTPLT
10251 || XINT (x, 1) == UNSPEC_GOTTPOFF
10252 || XINT (x, 1) == UNSPEC_DTPOFF
10253 || XINT (x, 1) == UNSPEC_TPOFF
10254 || XINT (x, 1) == UNSPEC_PLT
10255 || XINT (x, 1) == UNSPEC_SYMOFF
10256 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10259 fmt = GET_RTX_FORMAT (GET_CODE (x));
10260 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10265 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10266 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10269 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10276 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10277 @GOTOFF in `reg'. */
10279 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10282 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10285 if (GET_CODE (orig) == LABEL_REF
10286 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10288 if (reg == NULL_RTX)
10289 reg = gen_reg_rtx (Pmode);
10291 emit_insn (gen_symGOTOFF2reg (reg, orig));
10294 else if (GET_CODE (orig) == SYMBOL_REF)
10296 if (reg == NULL_RTX)
10297 reg = gen_reg_rtx (Pmode);
10299 emit_insn (gen_symGOT2reg (reg, orig));
10305 /* Given a (logical) mode size and an offset in bytes, try to find a the
10306 appropriate displacement value for a mov insn. On SH the displacements
10307 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10308 15 bytes in QImode. To compensate this we create a new base address by
10309 adding an adjustment value to it.
10311 If the originally requested offset is greater than 127 we prefer using
10312 values 124..127 over 128..131 to increase opportunities to use the
10315 In some cases it is possible that a requested offset might seem unaligned
10316 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10317 This is compensated by adjusting the base address so that the effective
10318 address of the displacement move insn will be aligned.
10320 This is not the best possible way of rebasing the base address, as it
10321 does not look at other present displacement addressings around it.
10322 In some cases this can create more base address adjustments than would
10323 actually be necessary. */
10330 static struct disp_adjust
10331 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10333 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10335 /* Do not try to use SH2A's large displacements here, because this would
10336 effectively disable the small displacement insns. */
10337 const int mode_sz = GET_MODE_SIZE (mode);
10338 const int mov_insn_sz = mov_insn_size (mode, false);
10339 const int max_disp = max_mov_insn_displacement (mode, false);
10340 const int max_disp_next = max_disp + mov_insn_sz;
10341 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10342 HOST_WIDE_INT offset_adjust;
10344 /* In some cases this actually does happen and we must check for it. */
10345 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10348 /* Keeps the previous behavior for QImode displacement addressing.
10349 This just decides how the offset is re-based. Removing this special
10350 case will result in slightly bigger code on average, but it's not that
10352 if (mov_insn_sz == 1)
10353 align_modifier = 0;
10355 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10357 if (mode_sz + offset - offset_adjust <= max_disp_next)
10359 res.offset_adjust = GEN_INT (offset_adjust);
10360 res.mov_disp = GEN_INT (offset - offset_adjust);
10366 /* Try to modify an illegitimate address and make it legitimate.
10367 If we find one, return the new, valid address.
10368 Otherwise, return the original address. */
10370 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10373 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10375 if (TARGET_SHMEDIA)
10378 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10379 || (TARGET_SH2E && mode == SFmode))
10382 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10383 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10385 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10386 INTVAL (XEXP (x, 1)));
10388 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10390 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10391 adj.offset_adjust, NULL_RTX, 0,
10393 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10400 /* Attempt to replace *p, which is an address that needs reloading, with
10401 a valid memory address for an operand of mode MODE.
10402 Like for sh_legitimize_address, for the SH we try to get a normal form
10403 of the address. That will allow inheritance of the address reloads. */
10405 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10408 enum reload_type type = (enum reload_type) itype;
10409 const int mode_sz = GET_MODE_SIZE (mode);
10411 if (TARGET_SHMEDIA)
10414 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10415 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10416 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10417 && (ALLOW_INDEXED_ADDRESS
10418 || XEXP (*p, 0) == stack_pointer_rtx
10419 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10421 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10422 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10424 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10426 push_reload (*p, NULL_RTX, p, NULL,
10427 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10431 if (TARGET_SH2E && mode == SFmode)
10433 *p = copy_rtx (*p);
10434 push_reload (*p, NULL_RTX, p, NULL,
10435 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10439 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10440 moves because then reload has a problem figuring the constraint
10441 that the move insn target/source reg must be R0.
10442 Or maybe some handling is wrong in sh_secondary_reload for this
10443 to work properly? */
10444 if ((mode_sz == 4 || mode_sz == 8)
10445 && ! (TARGET_SH4 && mode == DFmode)
10446 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10448 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10449 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10450 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10451 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10456 /* We must re-recognize what we created before. */
10457 if (GET_CODE (*p) == PLUS
10458 && (mode_sz == 4 || mode_sz == 8)
10459 && GET_CODE (XEXP (*p, 0)) == PLUS
10460 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10461 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10462 && CONST_INT_P (XEXP (*p, 1))
10463 && ! (TARGET_SH2E && mode == SFmode))
10465 /* Because this address is so complex, we know it must have
10466 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10467 it is already unshared, and needs no further unsharing. */
10468 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10469 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10476 /* In the name of slightly smaller debug output, and to cater to
10477 general assembler lossage, recognize various UNSPEC sequences
10478 and turn them back into a direct symbol reference. */
10480 sh_delegitimize_address (rtx orig_x)
10484 orig_x = delegitimize_mem_from_attrs (orig_x);
10489 if (GET_CODE (x) == CONST)
10492 if (GET_CODE (y) == UNSPEC)
10494 if (XINT (y, 1) == UNSPEC_GOT
10495 || XINT (y, 1) == UNSPEC_GOTOFF
10496 || XINT (y, 1) == UNSPEC_SYMOFF)
10497 return XVECEXP (y, 0, 0);
10498 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10500 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10502 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10504 if (GET_CODE (symplt) == UNSPEC
10505 && XINT (symplt, 1) == UNSPEC_PLT)
10506 return XVECEXP (symplt, 0, 0);
10509 else if (TARGET_SHMEDIA
10510 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10511 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10513 rtx offset = XVECEXP (y, 0, 1);
10515 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10516 if (MEM_P (orig_x))
10517 x = replace_equiv_address_nv (orig_x, x);
10526 /* Mark the use of a constant in the literal table. If the constant
10527 has multiple labels, make it unique. */
10529 mark_constant_pool_use (rtx x)
10531 rtx insn, lab, pattern;
10536 switch (GET_CODE (x))
10546 /* Get the first label in the list of labels for the same constant
10547 and delete another labels in the list. */
10549 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10551 if (!LABEL_P (insn)
10552 || LABEL_REFS (insn) != NEXT_INSN (insn))
10557 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10558 INSN_DELETED_P (insn) = 1;
10560 /* Mark constants in a window. */
10561 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10563 if (!NONJUMP_INSN_P (insn))
10566 pattern = PATTERN (insn);
10567 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10570 switch (XINT (pattern, 1))
10572 case UNSPECV_CONST2:
10573 case UNSPECV_CONST4:
10574 case UNSPECV_CONST8:
10575 XVECEXP (pattern, 0, 1) = const1_rtx;
10577 case UNSPECV_WINDOW_END:
10578 if (XVECEXP (pattern, 0, 0) == x)
10581 case UNSPECV_CONST_END:
10591 /* Return true if it's possible to redirect BRANCH1 to the destination
10592 of an unconditional jump BRANCH2. We only want to do this if the
10593 resulting branch will have a short displacement. */
10595 sh_can_redirect_branch (rtx branch1, rtx branch2)
10597 if (flag_expensive_optimizations && simplejump_p (branch2))
10599 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10603 for (distance = 0, insn = NEXT_INSN (branch1);
10604 insn && distance < 256;
10605 insn = PREV_INSN (insn))
10610 distance += get_attr_length (insn);
10612 for (distance = 0, insn = NEXT_INSN (branch1);
10613 insn && distance < 256;
10614 insn = NEXT_INSN (insn))
10619 distance += get_attr_length (insn);
10625 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10627 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10628 unsigned int new_reg)
10630 /* Interrupt functions can only use registers that have already been
10631 saved by the prologue, even if they would normally be
10633 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10639 /* Function to update the integer COST
10640 based on the relationship between INSN that is dependent on
10641 DEP_INSN through the dependence LINK. The default is to make no
10642 adjustment to COST. This can be used for example to specify to
10643 the scheduler that an output- or anti-dependence does not incur
10644 the same cost as a data-dependence. The return value should be
10645 the new value for COST. */
10647 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10651 if (TARGET_SHMEDIA)
10653 /* On SHmedia, if the dependence is an anti-dependence or
10654 output-dependence, there is no cost. */
10655 if (REG_NOTE_KIND (link) != 0)
10657 /* However, dependencies between target register loads and
10658 uses of the register in a subsequent block that are separated
10659 by a conditional branch are not modelled - we have to do with
10660 the anti-dependency between the target register load and the
10661 conditional branch that ends the current block. */
10662 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10663 && GET_CODE (PATTERN (dep_insn)) == SET
10664 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10665 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10666 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10668 int orig_cost = cost;
10669 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10670 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10671 ? insn : JUMP_LABEL (insn));
10672 /* On the likely path, the branch costs 1, on the unlikely path,
10676 target = next_active_insn (target);
10677 while (target && ! flow_dependent_p (target, dep_insn)
10679 /* If two branches are executed in immediate succession, with the
10680 first branch properly predicted, this causes a stall at the
10681 second branch, hence we won't need the target for the
10682 second branch for two cycles after the launch of the first
10684 if (cost > orig_cost - 2)
10685 cost = orig_cost - 2;
10691 else if (get_attr_is_mac_media (insn)
10692 && get_attr_is_mac_media (dep_insn))
10695 else if (! reload_completed
10696 && GET_CODE (PATTERN (insn)) == SET
10697 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10698 && GET_CODE (PATTERN (dep_insn)) == SET
10699 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10702 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10703 that is needed at the target. */
10704 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10705 && ! flow_dependent_p (insn, dep_insn))
10708 else if (REG_NOTE_KIND (link) == 0)
10710 enum attr_type type;
10713 if (recog_memoized (insn) < 0
10714 || recog_memoized (dep_insn) < 0)
10717 dep_set = single_set (dep_insn);
10719 /* The latency that we specify in the scheduling description refers
10720 to the actual output, not to an auto-increment register; for that,
10721 the latency is one. */
10722 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10724 rtx set = single_set (insn);
10727 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10728 && (!MEM_P (SET_DEST (set))
10729 || !reg_mentioned_p (SET_DEST (dep_set),
10730 XEXP (SET_DEST (set), 0))))
10733 /* The only input for a call that is timing-critical is the
10734 function's address. */
10737 rtx call = get_call_rtx_from (insn);
10739 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10740 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10741 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10742 cost -= TARGET_SH4_300 ? 3 : 6;
10744 /* Likewise, the most timing critical input for an sfuncs call
10745 is the function address. However, sfuncs typically start
10746 using their arguments pretty quickly.
10747 Assume a four cycle delay for SH4 before they are needed.
10748 Cached ST40-300 calls are quicker, so assume only a one
10750 ??? Maybe we should encode the delays till input registers
10751 are needed by sfuncs into the sfunc call insn. */
10752 /* All sfunc calls are parallels with at least four components.
10753 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10754 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10755 && XVECLEN (PATTERN (insn), 0) >= 4
10756 && (reg = sfunc_uses_reg (insn)))
10758 if (! reg_set_p (reg, dep_insn))
10759 cost -= TARGET_SH4_300 ? 1 : 4;
10761 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10763 enum attr_type dep_type = get_attr_type (dep_insn);
10765 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10767 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10768 && (type = get_attr_type (insn)) != TYPE_CALL
10769 && type != TYPE_SFUNC)
10771 /* When the preceding instruction loads the shift amount of
10772 the following SHAD/SHLD, the latency of the load is increased
10774 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10775 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10776 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10777 XEXP (SET_SRC (single_set (insn)),
10780 /* When an LS group instruction with a latency of less than
10781 3 cycles is followed by a double-precision floating-point
10782 instruction, FIPR, or FTRV, the latency of the first
10783 instruction is increased to 3 cycles. */
10785 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10786 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10788 /* The lsw register of a double-precision computation is ready one
10790 else if (reload_completed
10791 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10792 && (use_pat = single_set (insn))
10793 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10794 SET_SRC (use_pat)))
10797 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10798 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10801 else if (TARGET_SH4_300)
10803 /* Stores need their input register two cycles later. */
10804 if (dep_set && cost >= 1
10805 && ((type = get_attr_type (insn)) == TYPE_STORE
10806 || type == TYPE_PSTORE
10807 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10809 rtx set = single_set (insn);
10811 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10812 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10815 /* But don't reduce the cost below 1 if the address depends
10816 on a side effect of dep_insn. */
10818 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10824 /* An anti-dependence penalty of two applies if the first insn is a double
10825 precision fadd / fsub / fmul. */
10826 else if (!TARGET_SH4_300
10827 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10828 && recog_memoized (dep_insn) >= 0
10829 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10830 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10831 /* A lot of alleged anti-flow dependences are fake,
10832 so check this one is real. */
10833 && flow_dependent_p (dep_insn, insn))
10839 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10840 if DEP_INSN is anti-flow dependent on INSN. */
10842 flow_dependent_p (rtx insn, rtx dep_insn)
10844 rtx tmp = PATTERN (insn);
10846 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10847 return tmp == NULL_RTX;
10850 /* A helper function for flow_dependent_p called through note_stores. */
10852 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10854 rtx * pinsn = (rtx *) data;
10856 if (*pinsn && reg_referenced_p (x, *pinsn))
10860 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10861 'special function' patterns (type sfunc) that clobber pr, but that
10862 do not look like function calls to leaf_function_p. Hence we must
10863 do this extra check. */
10865 sh_pr_n_sets (void)
10867 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10870 /* Return where to allocate pseudo for a given hard register initial
10873 sh_allocate_initial_value (rtx hard_reg)
10877 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10880 && ! sh_pr_n_sets ()
10881 && ! (TARGET_SHCOMPACT
10882 && ((crtl->args.info.call_cookie
10883 & ~ CALL_COOKIE_RET_TRAMP (1))
10884 || crtl->saves_all_registers)))
10887 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10895 /* This function returns "2" to indicate dual issue for the SH4
10896 processor. To be used by the DFA pipeline description. */
10898 sh_issue_rate (void)
10900 if (TARGET_SUPERSCALAR)
10906 /* Functions for ready queue reordering for sched1. */
10908 /* Get weight for mode for a set x. */
10910 find_set_regmode_weight (rtx x, enum machine_mode mode)
10912 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10914 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10916 if (REG_P (SET_DEST (x)))
10918 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10928 /* Get regmode weight for insn. */
10930 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10932 short reg_weight = 0;
10935 /* Increment weight for each register born here. */
10936 x = PATTERN (insn);
10937 reg_weight += find_set_regmode_weight (x, mode);
10938 if (GET_CODE (x) == PARALLEL)
10941 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10943 x = XVECEXP (PATTERN (insn), 0, j);
10944 reg_weight += find_set_regmode_weight (x, mode);
10947 /* Decrement weight for each register that dies here. */
10948 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10950 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10952 rtx note = XEXP (x, 0);
10953 if (REG_P (note) && GET_MODE (note) == mode)
10960 /* Calculate regmode weights for all insns of a basic block. */
10962 find_regmode_weight (basic_block b, enum machine_mode mode)
10964 rtx insn, next_tail, head, tail;
10966 get_ebb_head_tail (b, b, &head, &tail);
10967 next_tail = NEXT_INSN (tail);
10969 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10971 /* Handle register life information. */
10972 if (!INSN_P (insn))
10975 if (mode == SFmode)
10976 INSN_REGMODE_WEIGHT (insn, mode) =
10977 find_insn_regmode_weight (insn, mode)
10978 + 2 * find_insn_regmode_weight (insn, DFmode);
10979 else if (mode == SImode)
10980 INSN_REGMODE_WEIGHT (insn, mode) =
10981 find_insn_regmode_weight (insn, mode)
10982 + 2 * find_insn_regmode_weight (insn, DImode);
10986 /* Comparison function for ready queue sorting. */
10988 rank_for_reorder (const void *x, const void *y)
10990 rtx tmp = *(const rtx *) y;
10991 rtx tmp2 = *(const rtx *) x;
10993 /* The insn in a schedule group should be issued the first. */
10994 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10995 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10997 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10998 minimizes instruction movement, thus minimizing sched's effect on
10999 register pressure. */
11000 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11003 /* Resort the array A in which only element at index N may be out of order. */
11005 swap_reorder (rtx *a, int n)
11007 rtx insn = a[n - 1];
11010 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11018 /* Sort the ready list by ascending priority. */
11020 ready_reorder (rtx *ready, int nready)
11023 swap_reorder (ready, nready);
11024 else if (nready > 2)
11025 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11028 /* Count life regions of r0 for a block. */
11030 find_r0_life_regions (basic_block b)
11039 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11050 insn = BB_HEAD (b);
11052 r0_reg = gen_rtx_REG (SImode, R0_REG);
11057 if (find_regno_note (insn, REG_DEAD, R0_REG))
11063 && (pset = single_set (insn))
11064 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11065 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11073 insn = NEXT_INSN (insn);
11075 return set - death;
11078 /* Calculate regmode weights for all insns of all basic block. */
11080 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11081 int verbose ATTRIBUTE_UNUSED,
11086 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11087 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11088 r0_life_regions = 0;
11090 FOR_EACH_BB_REVERSE (b)
11092 find_regmode_weight (b, SImode);
11093 find_regmode_weight (b, SFmode);
11094 if (!reload_completed)
11095 r0_life_regions += find_r0_life_regions (b);
11098 CURR_REGMODE_PRESSURE (SImode) = 0;
11099 CURR_REGMODE_PRESSURE (SFmode) = 0;
11104 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11105 int verbose ATTRIBUTE_UNUSED)
11107 if (regmode_weight[0])
11109 free (regmode_weight[0]);
11110 regmode_weight[0] = NULL;
11112 if (regmode_weight[1])
11114 free (regmode_weight[1]);
11115 regmode_weight[1] = NULL;
11119 /* The scalar modes supported differs from the default version in TImode
11120 for 32-bit SHMEDIA. */
11122 sh_scalar_mode_supported_p (enum machine_mode mode)
11124 if (TARGET_SHMEDIA32 && mode == TImode)
11127 return default_scalar_mode_supported_p (mode);
11130 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11131 keep count of register pressures on SImode and SFmode. */
11133 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11134 int sched_verbose ATTRIBUTE_UNUSED,
11136 int can_issue_more)
11138 if (GET_CODE (PATTERN (insn)) != USE
11139 && GET_CODE (PATTERN (insn)) != CLOBBER)
11140 cached_can_issue_more = can_issue_more - 1;
11142 cached_can_issue_more = can_issue_more;
11144 if (reload_completed)
11145 return cached_can_issue_more;
11147 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11148 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11150 return cached_can_issue_more;
11154 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11155 int verbose ATTRIBUTE_UNUSED,
11156 int veclen ATTRIBUTE_UNUSED)
11158 CURR_REGMODE_PRESSURE (SImode) = 0;
11159 CURR_REGMODE_PRESSURE (SFmode) = 0;
11162 /* Some magic numbers. */
11163 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11164 functions that already have high pressure on r0. */
11165 #define R0_MAX_LIFE_REGIONS 2
11166 /* Register Pressure thresholds for SImode and SFmode registers. */
11167 #define SIMODE_MAX_WEIGHT 5
11168 #define SFMODE_MAX_WEIGHT 10
11170 /* Return true if the pressure is high for MODE. */
11172 high_pressure (enum machine_mode mode)
11174 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11175 functions that already have high pressure on r0. */
11176 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11179 if (mode == SFmode)
11180 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11182 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11185 /* Reorder ready queue if register pressure is high. */
11187 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11188 int sched_verbose ATTRIBUTE_UNUSED,
11191 int clock_var ATTRIBUTE_UNUSED)
11193 if (reload_completed)
11194 return sh_issue_rate ();
11196 if (high_pressure (SFmode) || high_pressure (SImode))
11198 ready_reorder (ready, *n_readyp);
11201 return sh_issue_rate ();
11204 /* Skip cycles if the current register pressure is high. */
11206 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11207 int sched_verbose ATTRIBUTE_UNUSED,
11208 rtx *ready ATTRIBUTE_UNUSED,
11209 int *n_readyp ATTRIBUTE_UNUSED,
11210 int clock_var ATTRIBUTE_UNUSED)
11212 if (reload_completed)
11213 return cached_can_issue_more;
11215 if (high_pressure(SFmode) || high_pressure (SImode))
11218 return cached_can_issue_more;
11221 /* Skip cycles without sorting the ready queue. This will move insn from
11222 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11223 queue by sh_reorder. */
11225 /* Generally, skipping these many cycles are sufficient for all insns to move
11227 #define MAX_SKIPS 8
11230 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11231 int sched_verbose ATTRIBUTE_UNUSED,
11232 rtx insn ATTRIBUTE_UNUSED,
11233 int last_clock_var,
11237 if (reload_completed)
11242 if ((clock_var - last_clock_var) < MAX_SKIPS)
11247 /* If this is the last cycle we are skipping, allow reordering of R. */
11248 if ((clock_var - last_clock_var) == MAX_SKIPS)
11260 /* SHmedia requires registers for branches, so we can't generate new
11261 branches past reload. */
11263 sh_cannot_modify_jumps_p (void)
11265 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11269 sh_target_reg_class (void)
11271 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11275 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11277 if (! shmedia_space_reserved_for_target_registers)
11279 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11282 HARD_REG_SET dummy;
11283 if (calc_live_regs (&dummy) >= 6 * 8)
11289 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11291 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11295 On the SH1..SH4, the trampoline looks like
11296 2 0002 D202 mov.l l2,r2
11297 1 0000 D301 mov.l l1,r3
11298 3 0004 422B jmp @r2
11300 5 0008 00000000 l1: .long area
11301 6 000c 00000000 l2: .long function
11303 SH5 (compact) uses r1 instead of r3 for the static chain. */
11306 /* Emit RTL insns to initialize the variable parts of a trampoline.
11307 FNADDR is an RTX for the address of the function's pure code.
11308 CXT is an RTX for the static chain value for the function. */
11310 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11312 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11313 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11315 if (TARGET_SHMEDIA64)
11320 rtx movi1 = GEN_INT (0xcc000010);
11321 rtx shori1 = GEN_INT (0xc8000010);
11324 /* The following trampoline works within a +- 128 KB range for cxt:
11325 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11326 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11327 gettr tr1,r1; blink tr0,r63 */
11328 /* Address rounding makes it hard to compute the exact bounds of the
11329 offset for this trampoline, but we have a rather generous offset
11330 range, so frame_offset should do fine as an upper bound. */
11331 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11333 /* ??? could optimize this trampoline initialization
11334 by writing DImode words with two insns each. */
11335 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11336 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11337 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11338 insn = gen_rtx_AND (DImode, insn, mask);
11339 /* Or in ptb/u .,tr1 pattern */
11340 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11341 insn = force_operand (insn, NULL_RTX);
11342 insn = gen_lowpart (SImode, insn);
11343 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11344 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11345 insn = gen_rtx_AND (DImode, insn, mask);
11346 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11347 insn = gen_lowpart (SImode, insn);
11348 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11349 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11350 insn = gen_rtx_AND (DImode, insn, mask);
11351 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11352 insn = gen_lowpart (SImode, insn);
11353 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11354 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11355 insn = gen_rtx_AND (DImode, insn, mask);
11356 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11357 insn = gen_lowpart (SImode, insn);
11358 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11359 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11360 insn = gen_rtx_AND (DImode, insn, mask);
11361 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11362 insn = gen_lowpart (SImode, insn);
11363 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11364 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11365 GEN_INT (0x6bf10600));
11366 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11367 GEN_INT (0x4415fc10));
11368 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11369 GEN_INT (0x4401fff0));
11370 emit_insn (gen_ic_invalidate_line (tramp));
11373 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11374 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11376 tramp_templ = gen_datalabel_ref (tramp_templ);
11378 src = gen_const_mem (BLKmode, tramp_templ);
11379 set_mem_align (dst, 256);
11380 set_mem_align (src, 64);
11381 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11383 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11384 emit_move_insn (adjust_address (tramp_mem, Pmode,
11385 fixed_len + GET_MODE_SIZE (Pmode)),
11387 emit_insn (gen_ic_invalidate_line (tramp));
11390 else if (TARGET_SHMEDIA)
11392 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11393 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11394 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11395 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11396 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11397 rotated 10 right, and higher 16 bit of every 32 selected. */
11399 = force_reg (V2HImode, (simplify_gen_subreg
11400 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11401 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11402 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11404 fnaddr = force_reg (SImode, fnaddr);
11405 cxt = force_reg (SImode, cxt);
11406 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11407 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11409 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11410 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11411 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11412 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11413 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11414 gen_rtx_SUBREG (V2HImode, cxt, 0),
11416 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11417 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11418 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11419 if (TARGET_LITTLE_ENDIAN)
11421 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11422 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11426 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11427 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11429 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11430 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11431 emit_insn (gen_ic_invalidate_line (tramp));
11434 else if (TARGET_SHCOMPACT)
11436 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11439 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11440 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11442 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11443 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11445 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11446 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11447 if (TARGET_HARD_SH4 || TARGET_SH5)
11449 if (!TARGET_INLINE_IC_INVALIDATE
11450 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11451 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11452 FUNCTION_ORDINARY),
11453 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11455 emit_insn (gen_ic_invalidate_line (tramp));
11459 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11461 sh_trampoline_adjust_address (rtx tramp)
11463 if (TARGET_SHMEDIA)
11464 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11465 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11469 /* FIXME: This is overly conservative. A SHcompact function that
11470 receives arguments ``by reference'' will have them stored in its
11471 own stack frame, so it must not pass pointers or references to
11472 these arguments to other functions by means of sibling calls. */
11473 /* If PIC, we cannot make sibling calls to global functions
11474 because the PLT requires r12 to be live. */
11476 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11479 && (! TARGET_SHCOMPACT
11480 || crtl->args.info.stack_regs == 0)
11481 && ! sh_cfun_interrupt_handler_p ()
11483 || (decl && ! TREE_PUBLIC (decl))
11484 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11487 /* Machine specific built-in functions. */
11489 struct builtin_description
11491 bool (* const is_enabled) (void);
11492 const enum insn_code icode;
11493 const char *const name;
11499 shmedia_builtin_p (void)
11501 return TARGET_SHMEDIA;
11504 /* This function can be used if there are any built-ins that are not for
11505 SHmedia. It's commented out to avoid the defined-but-unused warning.
11507 sh1_builtin_p (void)
11513 /* describe number and signedness of arguments; arg[0] == result
11514 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11515 /* 9: 64-bit pointer, 10: 32-bit pointer */
11516 static const char signature_args[][4] =
11518 #define SH_BLTIN_V2SI2 0
11520 #define SH_BLTIN_V4HI2 1
11522 #define SH_BLTIN_V2SI3 2
11524 #define SH_BLTIN_V4HI3 3
11526 #define SH_BLTIN_V8QI3 4
11528 #define SH_BLTIN_MAC_HISI 5
11530 #define SH_BLTIN_SH_HI 6
11532 #define SH_BLTIN_SH_SI 7
11534 #define SH_BLTIN_V4HI2V2SI 8
11536 #define SH_BLTIN_V4HI2V8QI 9
11538 #define SH_BLTIN_SISF 10
11540 #define SH_BLTIN_LDUA_L 11
11542 #define SH_BLTIN_LDUA_Q 12
11544 #define SH_BLTIN_STUA_L 13
11546 #define SH_BLTIN_STUA_Q 14
11548 #define SH_BLTIN_LDUA_L64 15
11550 #define SH_BLTIN_LDUA_Q64 16
11552 #define SH_BLTIN_STUA_L64 17
11554 #define SH_BLTIN_STUA_Q64 18
11556 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11557 #define SH_BLTIN_2 19
11558 #define SH_BLTIN_SU 19
11560 #define SH_BLTIN_3 20
11561 #define SH_BLTIN_SUS 20
11563 #define SH_BLTIN_PSSV 21
11565 #define SH_BLTIN_XXUU 22
11566 #define SH_BLTIN_UUUU 22
11568 #define SH_BLTIN_PV 23
11570 #define SH_BLTIN_VP 24
11573 /* mcmv: operands considered unsigned. */
11574 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11575 /* mperm: control value considered unsigned int. */
11576 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11577 /* mshards_q: returns signed short. */
11578 /* nsb: takes long long arg, returns unsigned char. */
11579 static struct builtin_description bdesc[] =
11581 { shmedia_builtin_p,
11582 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11583 { shmedia_builtin_p,
11584 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11585 { shmedia_builtin_p,
11586 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11587 { shmedia_builtin_p,
11588 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11589 { shmedia_builtin_p,
11590 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11591 { shmedia_builtin_p,
11592 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11593 { shmedia_builtin_p,
11594 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11595 { shmedia_builtin_p,
11596 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11597 { shmedia_builtin_p,
11598 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11599 { shmedia_builtin_p,
11600 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11601 { shmedia_builtin_p,
11602 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11603 { shmedia_builtin_p,
11604 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11605 { shmedia_builtin_p,
11606 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11607 { shmedia_builtin_p,
11608 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11609 { shmedia_builtin_p,
11610 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11611 { shmedia_builtin_p,
11612 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11613 { shmedia_builtin_p,
11614 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11615 { shmedia_builtin_p,
11616 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11617 { shmedia_builtin_p,
11618 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11619 { shmedia_builtin_p,
11620 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11621 { shmedia_builtin_p,
11622 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11623 { shmedia_builtin_p,
11624 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11625 { shmedia_builtin_p,
11626 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11627 { shmedia_builtin_p,
11628 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11629 { shmedia_builtin_p,
11630 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11631 { shmedia_builtin_p,
11632 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11633 { shmedia_builtin_p,
11634 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11635 { shmedia_builtin_p,
11636 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11637 { shmedia_builtin_p,
11638 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11639 { shmedia_builtin_p,
11640 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11641 { shmedia_builtin_p,
11642 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11643 { shmedia_builtin_p,
11644 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11645 { shmedia_builtin_p,
11646 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11647 { shmedia_builtin_p,
11648 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11649 { shmedia_builtin_p,
11650 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11651 { shmedia_builtin_p,
11652 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11653 { shmedia_builtin_p,
11654 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11655 { shmedia_builtin_p,
11656 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11657 { shmedia_builtin_p,
11658 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11659 { shmedia_builtin_p,
11660 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11661 { shmedia_builtin_p,
11662 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11663 { shmedia_builtin_p,
11664 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11665 { shmedia_builtin_p,
11666 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11667 { shmedia_builtin_p,
11668 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11669 { shmedia_builtin_p,
11670 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11671 { shmedia_builtin_p,
11672 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11673 { shmedia_builtin_p,
11674 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11675 { shmedia_builtin_p,
11676 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11677 { shmedia_builtin_p,
11678 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11679 { shmedia_builtin_p,
11680 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11681 { shmedia_builtin_p,
11682 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11683 { shmedia_builtin_p,
11684 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11685 { shmedia_builtin_p,
11686 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11687 { shmedia_builtin_p,
11688 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11689 { shmedia_builtin_p,
11690 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11691 { shmedia_builtin_p,
11692 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11693 { shmedia_builtin_p,
11694 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11695 { shmedia_builtin_p,
11696 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11697 { shmedia_builtin_p,
11698 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11699 { shmedia_builtin_p,
11700 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11701 { shmedia_builtin_p,
11702 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11703 { shmedia_builtin_p,
11704 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11705 { shmedia_builtin_p,
11706 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11707 { shmedia_builtin_p,
11708 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11709 { shmedia_builtin_p,
11710 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11711 { shmedia_builtin_p,
11712 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11713 { shmedia_builtin_p,
11714 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11715 { shmedia_builtin_p,
11716 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11717 { shmedia_builtin_p,
11718 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11719 { shmedia_builtin_p,
11720 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11721 { shmedia_builtin_p,
11722 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11750 sh_init_builtins (void)
11752 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11753 memset (shared, 0, sizeof shared);
11755 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11757 builtin_description* d = &bdesc[di];
11759 if (!d->is_enabled ())
11762 tree type, arg_type = NULL_TREE;
11763 int signature = d->signature;
11765 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11766 type = shared[signature];
11769 int has_result = signature_args[signature][0] != 0;
11772 if ((signature_args[signature][1] & 8)
11773 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11774 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11776 if (! TARGET_FPU_ANY
11777 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11779 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11780 args[i] = NULL_TREE;
11781 for (int i = 3; ; i--)
11783 int arg = signature_args[signature][i];
11784 int opno = i - 1 + has_result;
11787 arg_type = ptr_type_node;
11789 arg_type = (*lang_hooks.types.type_for_mode)
11790 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11794 arg_type = void_type_node;
11797 args[i-1] = arg_type;
11799 type = build_function_type_list (arg_type, args[0], args[1],
11800 args[2], NULL_TREE);
11801 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11802 shared[signature] = type;
11805 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11810 /* Implements target hook vector_mode_supported_p. */
11812 sh_vector_mode_supported_p (enum machine_mode mode)
11815 && ((mode == V2SFmode)
11816 || (mode == V4SFmode)
11817 || (mode == V16SFmode)))
11820 else if (TARGET_SHMEDIA
11821 && ((mode == V8QImode)
11822 || (mode == V2HImode)
11823 || (mode == V4HImode)
11824 || (mode == V2SImode)))
11831 sh_frame_pointer_required (void)
11833 /* If needed override this in other tm.h files to cope with various OS
11834 lossage requiring a frame pointer. */
11835 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11844 /* Implements target hook dwarf_calling_convention. Return an enum
11845 of dwarf_calling_convention. */
11847 sh_dwarf_calling_convention (const_tree func)
11849 if (sh_attr_renesas_p (func))
11850 return DW_CC_GNU_renesas_sh;
11852 return DW_CC_normal;
11855 /* Returns the sh builtin decl for CODE. */
11857 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11859 if (code >= ARRAY_SIZE (bdesc))
11860 return error_mark_node;
11862 if (!bdesc[code].is_enabled ())
11863 return error_mark_node;
11865 return bdesc[code].fndecl;
11868 /* Expand an expression EXP that calls a built-in function,
11869 with result going to TARGET if that's convenient
11870 (and in mode MODE if that's convenient).
11871 SUBTARGET may be used as the target for computing one of EXP's operands.
11872 IGNORE is nonzero if the value is to be ignored. */
11874 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11875 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11877 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11878 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11879 const struct builtin_description *d = &bdesc[fcode];
11880 enum insn_code icode = d->icode;
11881 int signature = d->signature;
11885 if (signature_args[signature][0])
11890 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11891 if (! target || GET_MODE (target) != tmode
11892 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11893 target = gen_reg_rtx (tmode);
11894 op[nop++] = target;
11899 for (int i = 1; i <= 3; i++, nop++)
11902 enum machine_mode opmode, argmode;
11905 if (! signature_args[signature][i])
11907 arg = CALL_EXPR_ARG (exp, i - 1);
11908 if (arg == error_mark_node)
11910 if (signature_args[signature][i] & 8)
11913 optype = ptr_type_node;
11917 opmode = insn_data[icode].operand[nop].mode;
11918 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11920 argmode = TYPE_MODE (TREE_TYPE (arg));
11921 if (argmode != opmode)
11922 arg = build1 (NOP_EXPR, optype, arg);
11923 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11924 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11925 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11928 rtx pat = NULL_RTX;
11933 pat = (*insn_data[d->icode].genfun) (op[0]);
11936 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11939 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11942 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11945 gcc_unreachable ();
11954 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11956 rtx sel0 = const0_rtx;
11957 rtx sel1 = const1_rtx;
11958 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11959 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11961 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11962 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11966 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11968 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11970 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11971 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11974 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11975 We can allow any mode in any general register. The special registers
11976 only allow SImode. Don't allow any mode in the PR.
11978 We cannot hold DCmode values in the XD registers because alter_reg
11979 handles subregs of them incorrectly. We could work around this by
11980 spacing the XD registers like the DR registers, but this would require
11981 additional memory in every compilation to hold larger register vectors.
11982 We could hold SFmode / SCmode values in XD registers, but that
11983 would require a tertiary reload when reloading from / to memory,
11984 and a secondary reload to reload from / to general regs; that
11985 seems to be a losing proposition.
11987 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11988 it won't be ferried through GP registers first. */
11990 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11992 if (SPECIAL_REGISTER_P (regno))
11993 return mode == SImode;
11995 if (regno == FPUL_REG)
11996 return (mode == SImode || mode == SFmode);
11998 if (FP_REGISTER_P (regno) && mode == SFmode)
12001 if (mode == V2SFmode)
12003 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12004 || GENERAL_REGISTER_P (regno)))
12010 if (mode == V4SFmode)
12012 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12013 || GENERAL_REGISTER_P (regno))
12019 if (mode == V16SFmode)
12021 if (TARGET_SHMEDIA)
12023 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12029 return regno == FIRST_XD_REG;
12032 if (FP_REGISTER_P (regno))
12036 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12037 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12040 && (mode == DFmode || mode == DImode
12041 || mode == V2SFmode || mode == TImode)))
12042 && ((regno - FIRST_FP_REG) & 1) == 0)
12043 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12044 && ((regno - FIRST_FP_REG) & 3) == 0))
12050 if (XD_REGISTER_P (regno))
12051 return mode == DFmode;
12053 if (TARGET_REGISTER_P (regno))
12054 return (mode == DImode || mode == SImode || mode == PDImode);
12056 if (regno == PR_REG)
12057 return mode == SImode;
12059 if (regno == FPSCR_REG)
12060 return mode == PSImode;
12062 /* FIXME. This works around PR target/37633 for -O0. */
12063 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12065 unsigned int n = GET_MODE_SIZE (mode) / 8;
12067 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12068 && regno <= FIRST_GENERAL_REG + 14)
12075 /* Return the class of registers for which a mode change from FROM to TO
12078 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12079 enum reg_class rclass)
12081 /* We want to enable the use of SUBREGs as a means to
12082 VEC_SELECT a single element of a vector. */
12084 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12085 This can be problematic when SFmode vector subregs need to be accessed
12086 on the stack with displacement addressing, as it happens with -O0.
12087 Thus we disallow the mode change for -O0. */
12088 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12089 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12091 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12093 if (TARGET_LITTLE_ENDIAN)
12095 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12096 return reg_classes_intersect_p (DF_REGS, rclass);
12100 if (GET_MODE_SIZE (from) < 8)
12101 return reg_classes_intersect_p (DF_REGS, rclass);
12107 /* Return true if registers in machine mode MODE will likely be
12108 allocated to registers in small register classes. */
12110 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12112 return (! TARGET_SHMEDIA);
12115 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12116 that label is used. */
12118 sh_mark_label (rtx address, int nuses)
12120 if (GOTOFF_P (address))
12122 /* Extract the label or symbol. */
12123 address = XEXP (address, 0);
12124 if (GET_CODE (address) == PLUS)
12125 address = XEXP (address, 0);
12126 address = XVECEXP (address, 0, 0);
12128 if (GET_CODE (address) == LABEL_REF
12129 && LABEL_P (XEXP (address, 0)))
12130 LABEL_NUSES (XEXP (address, 0)) += nuses;
12133 /* Compute extra cost of moving data between one register class
12136 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12137 uses this information. Hence, the general register <-> floating point
12138 register information here is not used for SFmode. */
12140 sh_register_move_cost (enum machine_mode mode,
12141 reg_class_t srcclass, reg_class_t dstclass)
12143 if (dstclass == T_REGS || dstclass == PR_REGS)
12146 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12149 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12150 && REGCLASS_HAS_FP_REG (srcclass)
12151 && REGCLASS_HAS_FP_REG (dstclass))
12154 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12155 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12157 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12158 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12161 if ((REGCLASS_HAS_FP_REG (dstclass)
12162 && REGCLASS_HAS_GENERAL_REG (srcclass))
12163 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12164 && REGCLASS_HAS_FP_REG (srcclass)))
12166 /* Discourage trying to use fp regs for a pointer. This also
12167 discourages fp regs with SImode because Pmode is an alias
12168 of SImode on this target. See PR target/48596. */
12169 int addend = (mode == Pmode) ? 40 : 0;
12171 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12172 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12175 if ((dstclass == FPUL_REGS
12176 && REGCLASS_HAS_GENERAL_REG (srcclass))
12177 || (srcclass == FPUL_REGS
12178 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12181 if ((dstclass == FPUL_REGS
12182 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12183 || (srcclass == FPUL_REGS
12184 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12187 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12188 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12191 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12193 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12195 if (sh_gettrcost >= 0)
12196 return sh_gettrcost;
12197 else if (!TARGET_PT_FIXED)
12201 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12202 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12207 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12208 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12209 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12211 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12215 emit_load_ptr (rtx reg, rtx addr)
12217 rtx mem = gen_const_mem (ptr_mode, addr);
12219 if (Pmode != ptr_mode)
12220 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12221 return emit_move_insn (reg, mem);
12225 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12226 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12229 CUMULATIVE_ARGS cum;
12230 int structure_value_byref = 0;
12231 rtx this_rtx, this_value, sibcall, insns, funexp;
12232 tree funtype = TREE_TYPE (function);
12233 int simple_add = CONST_OK_FOR_ADD (delta);
12235 rtx scratch0, scratch1, scratch2;
12238 reload_completed = 1;
12239 epilogue_completed = 1;
12240 crtl->uses_only_leaf_regs = 1;
12242 emit_note (NOTE_INSN_PROLOGUE_END);
12244 /* Find the "this" pointer. We have such a wide range of ABIs for the
12245 SH that it's best to do this completely machine independently.
12246 "this" is passed as first argument, unless a structure return pointer
12247 comes first, in which case "this" comes second. */
12248 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12249 #ifndef PCC_STATIC_STRUCT_RETURN
12250 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12251 structure_value_byref = 1;
12252 #endif /* not PCC_STATIC_STRUCT_RETURN */
12253 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12255 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12257 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12260 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12262 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12263 static chain pointer (even if you can't have nested virtual functions
12264 right now, someone might implement them sometime), and the rest of the
12265 registers are used for argument passing, are callee-saved, or reserved. */
12266 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12267 -ffixed-reg has been used. */
12268 if (! call_used_regs[0] || fixed_regs[0])
12269 error ("r0 needs to be available as a call-clobbered register");
12270 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12273 if (call_used_regs[1] && ! fixed_regs[1])
12274 scratch1 = gen_rtx_REG (ptr_mode, 1);
12275 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12276 pointing where to return struct values. */
12277 if (call_used_regs[3] && ! fixed_regs[3])
12278 scratch2 = gen_rtx_REG (Pmode, 3);
12280 else if (TARGET_SHMEDIA)
12282 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12283 if (i != REGNO (scratch0) &&
12284 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12286 scratch1 = gen_rtx_REG (ptr_mode, i);
12289 if (scratch1 == scratch0)
12290 error ("need a second call-clobbered general purpose register");
12291 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12292 if (call_used_regs[i] && ! fixed_regs[i])
12294 scratch2 = gen_rtx_REG (Pmode, i);
12297 if (scratch2 == scratch0)
12298 error ("need a call-clobbered target register");
12301 this_value = plus_constant (Pmode, this_rtx, delta);
12303 && (simple_add || scratch0 != scratch1)
12304 && strict_memory_address_p (ptr_mode, this_value))
12306 emit_load_ptr (scratch0, this_value);
12311 ; /* Do nothing. */
12312 else if (simple_add)
12313 emit_move_insn (this_rtx, this_value);
12316 emit_move_insn (scratch1, GEN_INT (delta));
12317 emit_insn (gen_add2_insn (this_rtx, scratch1));
12325 emit_load_ptr (scratch0, this_rtx);
12327 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12328 if (strict_memory_address_p (ptr_mode, offset_addr))
12329 ; /* Do nothing. */
12330 else if (! TARGET_SH5 && scratch0 != scratch1)
12332 /* scratch0 != scratch1, and we have indexed loads. Get better
12333 schedule by loading the offset into r1 and using an indexed
12334 load - then the load of r1 can issue before the load from
12335 (this_rtx + delta) finishes. */
12336 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12337 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12339 else if (CONST_OK_FOR_ADD (vcall_offset))
12341 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12342 offset_addr = scratch0;
12344 else if (scratch0 != scratch1)
12346 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12347 emit_insn (gen_add2_insn (scratch0, scratch1));
12348 offset_addr = scratch0;
12351 gcc_unreachable (); /* FIXME */
12352 emit_load_ptr (scratch0, offset_addr);
12354 if (Pmode != ptr_mode)
12355 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12356 emit_insn (gen_add2_insn (this_rtx, scratch0));
12359 /* Generate a tail call to the target function. */
12360 if (! TREE_USED (function))
12362 assemble_external (function);
12363 TREE_USED (function) = 1;
12365 funexp = XEXP (DECL_RTL (function), 0);
12366 /* If the function is overridden, so is the thunk, hence we don't
12367 need GOT addressing even if this is a public symbol. */
12369 if (TARGET_SH1 && ! flag_weak)
12370 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12373 if (TARGET_SH2 && flag_pic)
12375 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12376 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12380 if (TARGET_SHMEDIA && flag_pic)
12382 funexp = gen_sym2PIC (funexp);
12383 PUT_MODE (funexp, Pmode);
12385 emit_move_insn (scratch2, funexp);
12386 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12387 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12389 sibcall = emit_call_insn (sibcall);
12390 SIBLING_CALL_P (sibcall) = 1;
12391 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12394 /* Run just enough of rest_of_compilation to do scheduling and get
12395 the insns emitted. Note that use_thunk calls
12396 assemble_start_function and assemble_end_function. */
12398 insns = get_insns ();
12404 split_all_insns_noflow ();
12408 shorten_branches (insns);
12409 final_start_function (insns, file, 1);
12410 final (insns, file, 1);
12411 final_end_function ();
12413 reload_completed = 0;
12414 epilogue_completed = 0;
12418 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12422 /* If this is not an ordinary function, the name usually comes from a
12423 string literal or an sprintf buffer. Make sure we use the same
12424 string consistently, so that cse will be able to unify address loads. */
12425 if (kind != FUNCTION_ORDINARY)
12426 name = IDENTIFIER_POINTER (get_identifier (name));
12427 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12428 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12432 case FUNCTION_ORDINARY:
12436 rtx reg = target ? target : gen_reg_rtx (Pmode);
12438 emit_insn (gen_symGOT2reg (reg, sym));
12444 /* ??? To allow cse to work, we use GOTOFF relocations.
12445 We could add combiner patterns to transform this into
12446 straight pc-relative calls with sym2PIC / bsrf when
12447 label load and function call are still 1:1 and in the
12448 same basic block during combine. */
12449 rtx reg = target ? target : gen_reg_rtx (Pmode);
12451 emit_insn (gen_symGOTOFF2reg (reg, sym));
12456 if (target && sym != target)
12458 emit_move_insn (target, sym);
12464 /* Find the number of a general purpose register in S. */
12466 scavenge_reg (HARD_REG_SET *s)
12469 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12470 if (TEST_HARD_REG_BIT (*s, r))
12476 sh_get_pr_initial_val (void)
12480 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12481 PR register on SHcompact, because it might be clobbered by the prologue.
12482 We check first if that is known to be the case. */
12483 if (TARGET_SHCOMPACT
12484 && ((crtl->args.info.call_cookie
12485 & ~ CALL_COOKIE_RET_TRAMP (1))
12486 || crtl->saves_all_registers))
12487 return gen_frame_mem (SImode, return_address_pointer_rtx);
12489 /* If we haven't finished rtl generation, there might be a nonlocal label
12490 that we haven't seen yet.
12491 ??? get_hard_reg_initial_val fails if it is called after register
12492 allocation has started, unless it has been called before for the
12493 same register. And even then, we end in trouble if we didn't use
12494 the register in the same basic block before. So call
12495 get_hard_reg_initial_val now and wrap it in an unspec if we might
12496 need to replace it. */
12497 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12498 combine can put the pseudo returned by get_hard_reg_initial_val into
12499 instructions that need a general purpose registers, which will fail to
12500 be recognized when the pseudo becomes allocated to PR. */
12502 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12504 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12509 sh_expand_t_scc (rtx operands[])
12511 enum rtx_code code = GET_CODE (operands[1]);
12512 rtx target = operands[0];
12513 rtx op0 = operands[2];
12514 rtx op1 = operands[3];
12515 rtx result = target;
12518 if (!REG_P (op0) || REGNO (op0) != T_REG
12519 || !CONST_INT_P (op1))
12521 if (!REG_P (result))
12522 result = gen_reg_rtx (SImode);
12523 val = INTVAL (op1);
12524 if ((code == EQ && val == 1) || (code == NE && val == 0))
12525 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12526 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12527 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12528 else if (code == EQ || code == NE)
12529 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12532 if (result != target)
12533 emit_move_insn (target, result);
12537 /* INSN is an sfunc; return the rtx that describes the address used. */
12539 extract_sfunc_addr (rtx insn)
12541 rtx pattern, part = NULL_RTX;
12544 pattern = PATTERN (insn);
12545 len = XVECLEN (pattern, 0);
12546 for (i = 0; i < len; i++)
12548 part = XVECEXP (pattern, 0, i);
12549 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12550 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12551 return XEXP (part, 0);
12553 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12554 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12557 /* Verify that the register in use_sfunc_addr still agrees with the address
12558 used in the sfunc. This prevents fill_slots_from_thread from changing
12560 INSN is the use_sfunc_addr instruction, and REG is the register it
12563 check_use_sfunc_addr (rtx insn, rtx reg)
12565 /* Search for the sfunc. It should really come right after INSN. */
12566 while ((insn = NEXT_INSN (insn)))
12568 if (LABEL_P (insn) || JUMP_P (insn))
12570 if (! INSN_P (insn))
12573 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12574 insn = XVECEXP (PATTERN (insn), 0, 0);
12575 if (GET_CODE (PATTERN (insn)) != PARALLEL
12576 || get_attr_type (insn) != TYPE_SFUNC)
12578 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12580 gcc_unreachable ();
12583 /* This function returns a constant rtx that represents 2**15 / pi in
12584 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12585 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12586 static GTY(()) rtx sh_fsca_sf2int_rtx;
12589 sh_fsca_sf2int (void)
12591 if (! sh_fsca_sf2int_rtx)
12593 REAL_VALUE_TYPE rv;
12595 real_from_string (&rv, "10430.378350470453");
12596 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12599 return sh_fsca_sf2int_rtx;
12602 /* This function returns a constant rtx that represents pi / 2**15 in
12603 SFmode. It's used to scale SFmode angles, in radians, to a
12604 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12605 maps to 0x10000. */
12606 static GTY(()) rtx sh_fsca_int2sf_rtx;
12609 sh_fsca_int2sf (void)
12611 if (! sh_fsca_int2sf_rtx)
12613 REAL_VALUE_TYPE rv;
12615 real_from_string (&rv, "9.587379924285257e-5");
12616 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12619 return sh_fsca_int2sf_rtx;
12622 /* Initialize the CUMULATIVE_ARGS structure. */
12624 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12626 rtx libname ATTRIBUTE_UNUSED,
12628 signed int n_named_args,
12629 enum machine_mode mode)
12631 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12632 pcum->free_single_fp_reg = 0;
12633 pcum->stack_regs = 0;
12634 pcum->byref_regs = 0;
12636 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12638 /* XXX - Should we check TARGET_HITACHI here ??? */
12639 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12643 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12644 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12645 pcum->prototype_p = prototype_p (fntype);
12646 pcum->arg_count [(int) SH_ARG_INT]
12647 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12650 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12651 && pcum->arg_count [(int) SH_ARG_INT] == 0
12652 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12653 ? int_size_in_bytes (TREE_TYPE (fntype))
12654 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12655 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12656 == FIRST_RET_REG));
12660 pcum->arg_count [(int) SH_ARG_INT] = 0;
12661 pcum->prototype_p = FALSE;
12662 if (mode != VOIDmode)
12664 pcum->call_cookie =
12665 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12666 && GET_MODE_SIZE (mode) > 4
12667 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12669 /* If the default ABI is the Renesas ABI then all library
12670 calls must assume that the library will be using the
12671 Renesas ABI. So if the function would return its result
12672 in memory then we must force the address of this memory
12673 block onto the stack. Ideally we would like to call
12674 targetm.calls.return_in_memory() here but we do not have
12675 the TYPE or the FNDECL available so we synthesize the
12676 contents of that function as best we can. */
12678 (TARGET_DEFAULT & MASK_HITACHI)
12679 && (mode == BLKmode
12680 || (GET_MODE_SIZE (mode) > 4
12681 && !(mode == DFmode
12682 && TARGET_FPU_DOUBLE)));
12686 pcum->call_cookie = 0;
12687 pcum->force_mem = FALSE;
12692 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12693 not enter into CONST_DOUBLE for the replace.
12695 Note that copying is not done so X must not be shared unless all copies
12696 are to be modified.
12698 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12699 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12700 replacements[n*2+1] - and that we take mode changes into account.
12702 If a replacement is ambiguous, return NULL_RTX.
12704 If MODIFY is zero, don't modify any rtl in place,
12705 just return zero or nonzero for failure / success. */
12707 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12712 /* The following prevents loops occurrence when we change MEM in
12713 CONST_DOUBLE onto the same CONST_DOUBLE. */
12714 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12717 for (i = n_replacements - 1; i >= 0 ; i--)
12718 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12719 return replacements[i*2+1];
12721 /* Allow this function to make replacements in EXPR_LISTs. */
12725 if (GET_CODE (x) == SUBREG)
12727 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12728 n_replacements, modify);
12730 if (CONST_INT_P (new_rtx))
12732 x = simplify_subreg (GET_MODE (x), new_rtx,
12733 GET_MODE (SUBREG_REG (x)),
12739 SUBREG_REG (x) = new_rtx;
12743 else if (REG_P (x))
12745 unsigned regno = REGNO (x);
12746 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12747 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12748 rtx result = NULL_RTX;
12750 for (i = n_replacements - 1; i >= 0; i--)
12752 rtx from = replacements[i*2];
12753 rtx to = replacements[i*2+1];
12754 unsigned from_regno, from_nregs, to_regno, new_regno;
12758 from_regno = REGNO (from);
12759 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12760 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12761 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12763 if (regno < from_regno
12764 || regno + nregs > from_regno + nregs
12768 to_regno = REGNO (to);
12769 if (to_regno < FIRST_PSEUDO_REGISTER)
12771 new_regno = regno + to_regno - from_regno;
12772 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12775 result = gen_rtx_REG (GET_MODE (x), new_regno);
12777 else if (GET_MODE (x) <= GET_MODE (to))
12778 result = gen_lowpart_common (GET_MODE (x), to);
12780 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12783 return result ? result : x;
12785 else if (GET_CODE (x) == ZERO_EXTEND)
12787 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12788 n_replacements, modify);
12790 if (CONST_INT_P (new_rtx))
12792 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12793 new_rtx, GET_MODE (XEXP (x, 0)));
12798 XEXP (x, 0) = new_rtx;
12803 fmt = GET_RTX_FORMAT (GET_CODE (x));
12804 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12810 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12811 n_replacements, modify);
12815 XEXP (x, i) = new_rtx;
12817 else if (fmt[i] == 'E')
12818 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12820 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12821 n_replacements, modify);
12825 XVECEXP (x, i, j) = new_rtx;
12833 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12835 enum rtx_code code = TRUNCATE;
12837 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12839 rtx inner = XEXP (x, 0);
12840 enum machine_mode inner_mode = GET_MODE (inner);
12842 if (inner_mode == mode)
12844 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12846 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12847 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12849 code = GET_CODE (x);
12853 return gen_rtx_fmt_e (code, mode, x);
12856 /* Called via for_each_rtx after reload, to clean up truncates of
12857 registers that span multiple actual hard registers. */
12859 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12863 if (GET_CODE (x) != TRUNCATE)
12866 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12868 enum machine_mode reg_mode = GET_MODE (reg);
12869 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12870 subreg_lowpart_offset (DImode, reg_mode));
12871 *(int*) n_changes += 1;
12877 /* Load and store depend on the highpart of the address. However,
12878 set_attr_alternative does not give well-defined results before reload,
12879 so we must look at the rtl ourselves to see if any of the feeding
12880 registers is used in a memref.
12882 Called by sh_contains_memref_p via for_each_rtx. */
12884 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12886 return (MEM_P (*loc));
12889 /* Return true iff INSN contains a MEM. */
12891 sh_contains_memref_p (rtx insn)
12893 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12896 /* Return true iff INSN loads a banked register. */
12898 sh_loads_bankedreg_p (rtx insn)
12900 if (GET_CODE (PATTERN (insn)) == SET)
12902 rtx op = SET_DEST (PATTERN(insn));
12903 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12910 /* FNADDR is the MEM expression from a call expander. Return an address
12911 to use in an SHmedia insn pattern. */
12913 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12917 fnaddr = XEXP (fnaddr, 0);
12918 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12919 if (flag_pic && is_sym)
12921 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12923 rtx reg = gen_reg_rtx (Pmode);
12925 /* We must not use GOTPLT for sibcalls, because PIC_REG
12926 must be restored before the PLT code gets to run. */
12928 emit_insn (gen_symGOT2reg (reg, fnaddr));
12930 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12935 fnaddr = gen_sym2PIC (fnaddr);
12936 PUT_MODE (fnaddr, Pmode);
12939 /* If ptabs might trap, make this visible to the rest of the compiler.
12940 We generally assume that symbols pertain to valid locations, but
12941 it is possible to generate invalid symbols with asm or linker tricks.
12942 In a list of functions where each returns its successor, an invalid
12943 symbol might denote an empty list. */
12944 if (!TARGET_PT_FIXED
12945 && (!is_sym || TARGET_INVALID_SYMBOLS)
12946 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12948 rtx tr = gen_reg_rtx (PDImode);
12950 emit_insn (gen_ptabs (tr, fnaddr));
12953 else if (! target_reg_operand (fnaddr, Pmode))
12954 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12958 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12960 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12962 if (rclass == NO_REGS
12964 && (CONST_DOUBLE_P (x)
12965 || GET_CODE (x) == SYMBOL_REF
12966 || PIC_ADDR_P (x)))
12967 return GENERAL_REGS;
12972 /* Implement TARGET_SECONDARY_RELOAD. */
12974 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12975 enum machine_mode mode, secondary_reload_info *sri)
12977 enum reg_class rclass = (enum reg_class) rclass_i;
12979 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
12980 && REG_P (XEXP (XEXP (x, 0), 0))
12981 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
12982 return rclass == R0_REGS ? NO_REGS : R0_REGS;
12984 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
12985 return rclass == R0_REGS ? NO_REGS : R0_REGS;
12987 if (REG_P (x) && REGNO (x) == GBR_REG)
12992 if (REGCLASS_HAS_FP_REG (rclass)
12993 && ! TARGET_SHMEDIA
12994 && immediate_operand ((x), mode)
12995 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12996 && mode == SFmode && fldi_ok ()))
13000 sri->icode = CODE_FOR_reload_insf__frn;
13003 sri->icode = CODE_FOR_reload_indf__frn;
13006 /* ??? If we knew that we are in the appropriate mode -
13007 single precision - we could use a reload pattern directly. */
13012 if (rclass == FPUL_REGS
13013 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13014 || REGNO (x) == T_REG))
13015 || GET_CODE (x) == PLUS))
13016 return GENERAL_REGS;
13017 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13019 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13020 return GENERAL_REGS;
13021 else if (mode == SFmode)
13023 sri->icode = CODE_FOR_reload_insi__i_fpul;
13026 if (rclass == FPSCR_REGS
13027 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13028 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13029 return GENERAL_REGS;
13030 if (REGCLASS_HAS_FP_REG (rclass)
13032 && immediate_operand (x, mode)
13033 && x != CONST0_RTX (GET_MODE (x))
13034 && GET_MODE (x) != V4SFmode)
13035 return GENERAL_REGS;
13036 if ((mode == QImode || mode == HImode)
13037 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13039 sri->icode = ((mode == QImode)
13040 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13043 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13044 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13045 return TARGET_REGS;
13046 } /* end of input-only processing. */
13048 if (((REGCLASS_HAS_FP_REG (rclass)
13050 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13051 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13052 && TARGET_FMOVD))))
13053 || (REGCLASS_HAS_GENERAL_REG (rclass)
13055 && FP_REGISTER_P (REGNO (x))))
13056 && ! TARGET_SHMEDIA
13057 && (mode == SFmode || mode == SImode))
13059 if ((rclass == FPUL_REGS
13060 || (REGCLASS_HAS_FP_REG (rclass)
13061 && ! TARGET_SHMEDIA && mode == SImode))
13064 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13065 || REGNO (x) == T_REG
13066 || system_reg_operand (x, VOIDmode)))))
13068 if (rclass == FPUL_REGS)
13069 return GENERAL_REGS;
13072 if ((rclass == TARGET_REGS
13073 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13074 && !satisfies_constraint_Csy (x)
13075 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13076 return GENERAL_REGS;
13077 if ((rclass == MAC_REGS || rclass == PR_REGS)
13078 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13079 && rclass != REGNO_REG_CLASS (REGNO (x)))
13080 return GENERAL_REGS;
13081 if (rclass != GENERAL_REGS && REG_P (x)
13082 && TARGET_REGISTER_P (REGNO (x)))
13083 return GENERAL_REGS;
13085 /* If here fall back to loading FPUL register through general registers.
13086 This case can happen when movsi_ie insn is picked initially to
13087 load/store the FPUL register from/to another register, and then the
13088 other register is allocated on the stack. */
13089 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13090 return GENERAL_REGS;
13092 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13094 On SH2A could also just leave it alone here, which would result in a
13095 4 byte move insn being generated instead. However, for this to work
13096 the insns must have the appropriate alternatives. */
13097 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13098 && satisfies_constraint_Sdd (x)
13099 && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
13102 /* When reload is trying to address a QImode or HImode subreg on the stack,
13103 force any subreg byte into R0_REGS, as this is going to become a
13104 displacement address.
13105 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13106 is on the stack, the memref to it might already require a displacement
13107 and that has to be added to the final address. At this point we don't
13108 know the cumulative displacement so we assume the worst case. */
13109 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13110 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13117 sh_conditional_register_usage (void)
13120 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13121 if (! VALID_REGISTER_P (regno))
13122 fixed_regs[regno] = call_used_regs[regno] = 1;
13123 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13126 call_used_regs[FIRST_GENERAL_REG + 8]
13127 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13128 call_really_used_regs[FIRST_GENERAL_REG + 8]
13129 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13131 if (TARGET_SHMEDIA)
13133 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13134 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13135 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13139 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13140 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13142 /* Renesas saves and restores mac registers on call. */
13143 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13145 call_really_used_regs[MACH_REG] = 0;
13146 call_really_used_regs[MACL_REG] = 0;
13149 if (TARGET_SHMEDIA)
13151 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13152 if (! fixed_regs[regno] && call_really_used_regs[regno])
13153 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13156 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13157 if (! fixed_regs[regno] && call_really_used_regs[regno])
13158 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13161 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13163 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13165 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13167 return (TARGET_SHMEDIA
13168 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13169 || x == CONST0_RTX (mode)
13170 || !TARGET_SHMEDIA_FPU
13171 || TARGET_SHMEDIA64)
13172 : (GET_CODE (x) != CONST_DOUBLE
13173 || mode == DFmode || mode == SFmode
13174 || mode == DImode || GET_MODE (x) == VOIDmode));
13177 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13180 sh_init_sync_libfuncs (void)
13182 init_sync_libfuncs (UNITS_PER_WORD);
13185 /* Return true if it is appropriate to emit `ret' instructions in the
13186 body of a function. */
13188 sh_can_use_simple_return_p (void)
13190 HARD_REG_SET live_regs_mask;
13193 /* Some targets require special return insns. */
13195 || (TARGET_SHCOMPACT
13196 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13199 if (! reload_completed || frame_pointer_needed)
13202 /* Moving prologue around does't reduce the size. */
13203 if (optimize_function_for_size_p (cfun))
13206 /* Finally, allow for pr save. */
13207 d = calc_live_regs (&live_regs_mask);
13209 if (rounded_frame_size (d) > 4)
13215 /*------------------------------------------------------------------------------
13216 Address mode optimization support code
13219 typedef HOST_WIDE_INT disp_t;
13220 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13221 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13222 static const disp_t INVALID_DISP = MAX_DISP;
13224 /* A memory reference which is described by a base register and a
13226 class base_reg_disp
13229 base_reg_disp (rtx br, disp_t d);
13231 bool is_reg (void) const;
13232 bool is_disp (void) const;
13233 rtx reg (void) const;
13234 disp_t disp (void) const;
13242 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13243 : reg_ (br), disp_ (d)
13248 base_reg_disp::is_reg (void) const
13250 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13254 base_reg_disp::is_disp (void) const
13256 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13260 base_reg_disp::reg (void) const
13266 base_reg_disp::disp (void) const
13271 /* Find the base register and calculate the displacement for a given
13273 This is done by walking the insn list backwards and following SET insns
13274 that set the value of the specified reg 'x'. */
13275 static base_reg_disp
13276 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13280 if (REGNO (x) == GBR_REG)
13281 return base_reg_disp (x, disp);
13283 /* We've reached a hard-reg. This is probably the point where
13284 function args are copied to pseudos. Do not go any further and
13285 stick to the pseudo. If the original mem addr was in a hard reg
13286 from the beginning, it will become the base reg. */
13287 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13288 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13290 /* Try to find the previous insn that sets the reg. */
13291 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13292 i = prev_nonnote_insn (i))
13294 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13298 if (!NONJUMP_INSN_P (i))
13301 rtx p = PATTERN (i);
13302 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13303 && REGNO (XEXP (p, 0)) == REGNO (x))
13305 /* If the recursion can't find out any more details about the
13306 source of the set, then this reg becomes our new base reg. */
13307 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13311 /* When here, no previous insn was found that sets the reg.
13312 The input reg is already the base reg. */
13313 return base_reg_disp (x, disp);
13316 else if (GET_CODE (x) == PLUS)
13318 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13319 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13321 /* Either left or right val must be a reg.
13322 We don't handle the case of 'reg + reg' here. */
13323 if (left_val.is_reg () && right_val.is_disp ())
13324 return base_reg_disp (left_val.reg (), left_val.disp ()
13325 + right_val.disp () + disp);
13326 else if (right_val.is_reg () && left_val.is_disp ())
13327 return base_reg_disp (right_val.reg (), right_val.disp ()
13328 + left_val.disp () + disp);
13330 return base_reg_disp (base_reg, disp);
13333 else if (CONST_INT_P (x))
13334 return base_reg_disp (NULL, disp + INTVAL (x));
13336 /* Didn't find anything useful. */
13337 return base_reg_disp (base_reg, disp);
13340 /* Given an insn and a memory operand, try to find an equivalent GBR
13341 based memory address and return the corresponding new memory address.
13342 Return NULL_RTX if not found. */
13344 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13349 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13350 if (side_effects_p (XEXP (mem, 0)))
13353 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13355 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13357 rtx disp = GEN_INT (gbr_disp.disp ());
13358 if (gbr_displacement (disp, GET_MODE (mem)))
13359 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13365 /*------------------------------------------------------------------------------
13366 Manual insn combine support code.
13369 /* Given a reg rtx and a start insn, try to find the insn that sets the
13370 specified reg by using the specified insn stepping function, such as
13371 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13374 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13377 result.insn = insn;
13378 result.set_rtx = NULL_RTX;
13379 result.set_src = NULL_RTX;
13381 if (!REG_P (reg) || insn == NULL_RTX)
13384 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13385 result.insn = stepfunc (result.insn))
13387 if (BARRIER_P (result.insn))
13389 if (!NONJUMP_INSN_P (result.insn))
13391 if (reg_set_p (reg, result.insn))
13393 result.set_rtx = set_of (reg, result.insn);
13395 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13398 result.set_src = XEXP (result.set_rtx, 1);
13406 /* Given an op rtx and an insn, try to find out whether the result of the
13407 specified op consists only of logical operations on T bit stores. */
13409 sh_is_logical_t_store_expr (rtx op, rtx insn)
13411 if (!logical_operator (op, SImode))
13414 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13415 int op_is_t_count = 0;
13417 for (int i = 0; i < 2; ++i)
13419 if (t_reg_operand (ops[i], VOIDmode)
13420 || negt_reg_operand (ops[i], VOIDmode))
13425 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13426 prev_nonnote_insn_bb);
13427 if (op_set.set_src == NULL_RTX)
13430 if (t_reg_operand (op_set.set_src, VOIDmode)
13431 || negt_reg_operand (op_set.set_src, VOIDmode)
13432 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13437 return op_is_t_count == 2;
13440 /* Given the operand that is extended in a sign/zero extend insn, and the
13441 insn, try to figure out whether the sign/zero extension can be replaced
13442 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13443 NULL_RTX otherwise. */
13445 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13447 if (REG_P (extended_op))
13448 extended_op = extended_op;
13449 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13450 extended_op = SUBREG_REG (extended_op);
13454 /* Reg moves must be of the same mode. */
13455 if (GET_MODE (extended_op) != SImode)
13458 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13459 if (s.set_src == NULL_RTX)
13462 if (t_reg_operand (s.set_src, VOIDmode)
13463 || negt_reg_operand (s.set_src, VOIDmode))
13464 return extended_op;
13466 /* If the zero extended reg was formed by a logical operation, check the
13467 operands of the logical operation. If both originated from T bit
13468 stores the zero extension can be eliminated. */
13469 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13470 return extended_op;