1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
28 #include "coretypes.h"
30 #include "insn-config.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
43 #include "hard-reg-set.h"
45 #include "insn-attr.h"
46 #include "diagnostic-core.h"
51 #include "target-def.h"
52 #include "langhooks.h"
53 #include "basic-block.h"
56 #include "sched-int.h"
59 #include "hash-table.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-fold.h"
64 #include "gimple-expr.h"
69 #include "alloc-pool.h"
70 #include "tm-constrs.h"
72 #include "tree-pass.h"
73 #include "pass_manager.h"
77 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
79 /* These are some macros to abstract register modes. */
80 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
81 && ((HOST_WIDE_INT)(VALUE)) <= 511)
83 #define CONST_OK_FOR_ADD(size) \
84 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
85 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
86 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
87 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
89 /* Used to simplify the logic below. Find the attributes wherever
91 #define SH_ATTRIBUTES(decl) \
92 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
93 : DECL_ATTRIBUTES (decl) \
94 ? (DECL_ATTRIBUTES (decl)) \
95 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
97 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
98 int current_function_interrupt;
100 tree sh_deferred_function_attributes;
101 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* Number of r0 life regions. */
117 static int r0_life_regions;
119 /* If true, skip cycles for Q -> R movement. */
120 static int skip_cycles = 0;
122 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
123 and returned from sh_reorder2. */
124 static short cached_can_issue_more;
126 /* Unique number for UNSPEC_BBR pattern. */
127 static unsigned int unspec_bbr_uid = 1;
129 /* Provides the class number of the smallest class containing
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS, GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 int assembler_dialect;
183 static bool shmedia_space_reserved_for_target_registers;
185 static void split_branches (rtx_insn *);
186 static int branch_dest (rtx);
187 static void print_slot (rtx);
188 static rtx_code_label *add_constant (rtx, enum machine_mode, rtx);
189 static void dump_table (rtx_insn *, rtx_insn *);
190 static bool broken_move (rtx_insn *);
191 static bool mova_p (rtx_insn *);
192 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
193 static bool noncall_uses_reg (rtx, rtx, rtx *);
194 static rtx gen_block_redirect (rtx, int, int);
195 static void sh_reorg (void);
196 static void sh_option_override (void);
197 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
198 static rtx_insn *frame_insn (rtx);
199 static rtx push (int);
200 static void pop (int);
201 static void push_regs (HARD_REG_SET *, int);
202 static int calc_live_regs (HARD_REG_SET *);
203 static HOST_WIDE_INT rounded_frame_size (int);
204 static bool sh_frame_pointer_required (void);
205 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
206 static int sh_mode_needed (int, rtx);
207 static int sh_mode_after (int, int, rtx);
208 static int sh_mode_entry (int);
209 static int sh_mode_exit (int);
210 static int sh_mode_priority (int entity, int n);
212 static rtx mark_constant_pool_use (rtx);
213 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
215 static tree sh_handle_resbank_handler_attribute (tree *, tree,
217 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
219 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
220 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
221 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
222 static void sh_print_operand (FILE *, rtx, int);
223 static void sh_print_operand_address (FILE *, rtx);
224 static bool sh_print_operand_punct_valid_p (unsigned char code);
225 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
226 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
227 static void sh_insert_attributes (tree, tree *);
228 static const char *sh_check_pch_target_flags (int);
229 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (basic_block, enum machine_mode);
236 static int find_r0_life_regions (basic_block);
237 static void sh_md_init_global (FILE *, int, int);
238 static void sh_md_finish_global (FILE *, int);
239 static int rank_for_reorder (const void *, const void *);
240 static void swap_reorder (rtx_insn **, int);
241 static void ready_reorder (rtx_insn **, int);
242 static bool high_pressure (enum machine_mode);
243 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
244 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
245 static void sh_md_init (FILE *, int, int);
246 static int sh_variable_issue (FILE *, int, rtx, int);
248 static bool sh_function_ok_for_sibcall (tree, tree);
250 static bool sh_cannot_modify_jumps_p (void);
251 static reg_class_t sh_target_reg_class (void);
252 static bool sh_optimize_target_register_callee_saved (bool);
253 static bool sh_ms_bitfield_layout_p (const_tree);
255 static void sh_init_builtins (void);
256 static tree sh_builtin_decl (unsigned, bool);
257 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
258 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
259 HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static bool flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, const_rtx, void *);
263 static int shiftcosts (rtx);
264 static int and_xor_ior_costs (rtx, int);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
270 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
271 static int sh_pr_n_sets (void);
272 static rtx sh_allocate_initial_value (rtx);
273 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
274 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
276 struct secondary_reload_info *);
277 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
278 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
279 static rtx sh_delegitimize_address (rtx);
280 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
281 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
282 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
283 static int scavenge_reg (HARD_REG_SET *s);
284 struct save_schedule_s;
285 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
286 struct save_schedule_s *, int);
288 static rtx sh_struct_value_rtx (tree, int);
289 static rtx sh_function_value (const_tree, const_tree, bool);
290 static bool sh_function_value_regno_p (const unsigned int);
291 static rtx sh_libcall_value (enum machine_mode, const_rtx);
292 static bool sh_return_in_memory (const_tree, const_tree);
293 static rtx sh_builtin_saveregs (void);
294 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
296 static bool sh_strict_argument_naming (cumulative_args_t);
297 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
298 static tree sh_build_builtin_va_list (void);
299 static void sh_va_start (tree, rtx);
300 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
301 static bool sh_promote_prototypes (const_tree);
302 static enum machine_mode sh_promote_function_mode (const_tree type,
307 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
309 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
311 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
313 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
315 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
317 static bool sh_scalar_mode_supported_p (enum machine_mode);
318 static int sh_dwarf_calling_convention (const_tree);
319 static void sh_encode_section_info (tree, rtx, int);
320 static bool sh2a_function_vector_p (tree);
321 static void sh_trampoline_init (rtx, tree, rtx);
322 static rtx sh_trampoline_adjust_address (rtx);
323 static void sh_conditional_register_usage (void);
324 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
325 static int mov_insn_size (enum machine_mode, bool);
326 static int mov_insn_alignment_mask (enum machine_mode, bool);
327 static bool sequence_insn_p (rtx);
328 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
329 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
330 enum machine_mode, bool);
331 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
333 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
335 static const struct attribute_spec sh_attribute_table[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338 affects_type_identity } */
339 { "interrupt_handler", 0, 0, true, false, false,
340 sh_handle_interrupt_handler_attribute, false },
341 { "sp_switch", 1, 1, true, false, false,
342 sh_handle_sp_switch_attribute, false },
343 { "trap_exit", 1, 1, true, false, false,
344 sh_handle_trap_exit_attribute, false },
345 { "renesas", 0, 0, false, true, false,
346 sh_handle_renesas_attribute, false },
347 { "trapa_handler", 0, 0, true, false, false,
348 sh_handle_interrupt_handler_attribute, false },
349 { "nosave_low_regs", 0, 0, true, false, false,
350 sh_handle_interrupt_handler_attribute, false },
351 { "resbank", 0, 0, true, false, false,
352 sh_handle_resbank_handler_attribute, false },
353 { "function_vector", 1, 1, true, false, false,
354 sh2a_handle_function_vector_handler_attribute, false },
355 { NULL, 0, 0, false, false, false, NULL, false }
358 /* Initialize the GCC target structure. */
359 #undef TARGET_ATTRIBUTE_TABLE
360 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
362 /* The next two are used for debug info when compiling with -gdwarf. */
363 #undef TARGET_ASM_UNALIGNED_HI_OP
364 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
365 #undef TARGET_ASM_UNALIGNED_SI_OP
366 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
368 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
369 #undef TARGET_ASM_UNALIGNED_DI_OP
370 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
371 #undef TARGET_ASM_ALIGNED_DI_OP
372 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE sh_option_override
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND sh_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
383 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
384 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
386 #undef TARGET_ASM_FUNCTION_EPILOGUE
387 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
389 #undef TARGET_ASM_OUTPUT_MI_THUNK
390 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
392 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
393 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
394 hook_bool_const_tree_hwi_hwi_const_tree_true
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START sh_file_start
398 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
399 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
423 The description of the hooks are as below:
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
475 #undef TARGET_CANNOT_MODIFY_JUMPS_P
476 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
477 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
478 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
479 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
480 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
481 sh_optimize_target_register_callee_saved
483 #undef TARGET_MS_BITFIELD_LAYOUT_P
484 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
486 #undef TARGET_INIT_BUILTINS
487 #define TARGET_INIT_BUILTINS sh_init_builtins
488 #undef TARGET_BUILTIN_DECL
489 #define TARGET_BUILTIN_DECL sh_builtin_decl
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
496 #undef TARGET_CANNOT_COPY_INSN_P
497 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS sh_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST sh_address_cost
502 #undef TARGET_ALLOCATE_INITIAL_VALUE
503 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
505 #undef TARGET_MACHINE_DEPENDENT_REORG
506 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
512 #undef TARGET_HAVE_TLS
513 #define TARGET_HAVE_TLS true
516 #undef TARGET_PROMOTE_PROTOTYPES
517 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
518 #undef TARGET_PROMOTE_FUNCTION_MODE
519 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
521 #undef TARGET_FUNCTION_VALUE
522 #define TARGET_FUNCTION_VALUE sh_function_value
523 #undef TARGET_FUNCTION_VALUE_REGNO_P
524 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
525 #undef TARGET_LIBCALL_VALUE
526 #define TARGET_LIBCALL_VALUE sh_libcall_value
527 #undef TARGET_STRUCT_VALUE_RTX
528 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
532 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
533 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
534 #undef TARGET_SETUP_INCOMING_VARARGS
535 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
536 #undef TARGET_STRICT_ARGUMENT_NAMING
537 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
538 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
539 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
540 #undef TARGET_MUST_PASS_IN_STACK
541 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
542 #undef TARGET_PASS_BY_REFERENCE
543 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
544 #undef TARGET_CALLEE_COPIES
545 #define TARGET_CALLEE_COPIES sh_callee_copies
546 #undef TARGET_ARG_PARTIAL_BYTES
547 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
548 #undef TARGET_FUNCTION_ARG
549 #define TARGET_FUNCTION_ARG sh_function_arg
550 #undef TARGET_FUNCTION_ARG_ADVANCE
551 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
560 #undef TARGET_SCALAR_MODE_SUPPORTED_P
561 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
562 #undef TARGET_VECTOR_MODE_SUPPORTED_P
563 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
565 #undef TARGET_CHECK_PCH_TARGET_FLAGS
566 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
568 #undef TARGET_DWARF_CALLING_CONVENTION
569 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
571 #undef TARGET_FRAME_POINTER_REQUIRED
572 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
574 #undef TARGET_MODE_EMIT
575 #define TARGET_MODE_EMIT sh_emit_mode_set
577 #undef TARGET_MODE_NEEDED
578 #define TARGET_MODE_NEEDED sh_mode_needed
580 #undef TARGET_MODE_AFTER
581 #define TARGET_MODE_AFTER sh_mode_after
583 #undef TARGET_MODE_ENTRY
584 #define TARGET_MODE_ENTRY sh_mode_entry
586 #undef TARGET_MODE_EXIT
587 #define TARGET_MODE_EXIT sh_mode_exit
589 #undef TARGET_MODE_PRIORITY
590 #define TARGET_MODE_PRIORITY sh_mode_priority
592 /* Return regmode weight for insn. */
593 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
594 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
596 /* Return current register pressure for regmode. */
597 #define CURR_REGMODE_PRESSURE(MODE)\
598 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
600 #undef TARGET_ENCODE_SECTION_INFO
601 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
603 #undef TARGET_SECONDARY_RELOAD
604 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
606 #undef TARGET_PREFERRED_RELOAD_CLASS
607 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
609 #undef TARGET_CONDITIONAL_REGISTER_USAGE
610 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
612 #undef TARGET_LEGITIMATE_ADDRESS_P
613 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
615 #undef TARGET_TRAMPOLINE_INIT
616 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
617 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
618 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
620 #undef TARGET_LEGITIMATE_CONSTANT_P
621 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
623 #undef TARGET_CANONICALIZE_COMPARISON
624 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
626 #undef TARGET_FIXED_CONDITION_CODE_REGS
627 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
629 /* Machine-specific symbol_ref flags. */
630 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
632 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
633 is used by optabs.c atomic op expansion code as well as in sync.md. */
634 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
635 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
637 struct gcc_target targetm = TARGET_INITIALIZER;
640 /* Information on the currently selected atomic model.
641 This is initialized in sh_option_override. */
642 static sh_atomic_model selected_atomic_model_;
644 const sh_atomic_model&
645 selected_atomic_model (void)
647 return selected_atomic_model_;
650 static sh_atomic_model
651 parse_validate_atomic_model_option (const char* str)
653 const char* model_names[sh_atomic_model::num_models];
654 model_names[sh_atomic_model::none] = "none";
655 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
656 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
657 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
658 model_names[sh_atomic_model::soft_imask] = "soft-imask";
660 const char* model_cdef_names[sh_atomic_model::num_models];
661 model_cdef_names[sh_atomic_model::none] = "NONE";
662 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
663 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
664 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
665 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
668 ret.type = sh_atomic_model::none;
669 ret.name = model_names[sh_atomic_model::none];
670 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
672 ret.tcb_gbr_offset = -1;
674 /* Handle empty string as 'none'. */
675 if (str == NULL || *str == '\0')
678 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
680 std::vector<std::string> tokens;
681 for (std::stringstream ss (str); ss.good (); )
683 tokens.push_back (std::string ());
684 std::getline (ss, tokens.back (), ',');
688 err_ret ("invalid atomic model option");
690 /* The first token must be the atomic model name. */
692 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
693 if (tokens.front () == model_names[i])
695 ret.type = (sh_atomic_model::enum_type)i;
696 ret.name = model_names[i];
697 ret.cdef_name = model_cdef_names[i];
701 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
705 /* Go through the remaining tokens. */
706 for (size_t i = 1; i < tokens.size (); ++i)
708 if (tokens[i] == "strict")
710 else if (tokens[i].find ("gbr-offset=") == 0)
712 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
713 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
714 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
715 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
716 "option", offset_str.c_str ());
719 err_ret ("unknown parameter \"%s\" in atomic model option",
723 /* Check that the selection makes sense. */
724 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
725 err_ret ("atomic operations are not supported on SHmedia");
727 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
728 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
731 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
732 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
734 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
735 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
737 if (ret.type == sh_atomic_model::soft_tcb
738 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
739 || (ret.tcb_gbr_offset & 3) != 0))
740 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
741 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
744 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
745 err_ret ("cannot use atomic model %s in user mode", ret.name);
752 /* Register SH specific RTL passes. */
753 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
755 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
758 register_sh_passes (void)
763 /* Running the sh_treg_combine pass after ce1 generates better code when
764 comparisons are combined and reg-reg moves are introduced, because
765 reg-reg moves will be eliminated afterwards. However, there are quite
766 some cases where combine will be unable to fold comparison related insns,
767 thus for now don't do it.
768 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
769 PASS_POS_INSERT_AFTER, "ce1", 1);
772 /* Run sh_treg_combine pass after combine but before register allocation. */
773 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
774 PASS_POS_INSERT_AFTER, "split1", 1);
776 /* Run sh_treg_combine pass after register allocation and basic block
777 reordering as this sometimes creates new opportunities. */
778 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
779 PASS_POS_INSERT_AFTER, "split4", 1);
781 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
782 is known after a conditional branch.
783 This must be done after basic blocks and branch conditions have
784 stabilized and won't be changed by further passes. */
785 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
786 PASS_POS_INSERT_BEFORE, "sched2", 1);
789 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
790 various options, and do some machine dependent initialization. */
792 sh_option_override (void)
796 SUBTARGET_OVERRIDE_OPTIONS;
797 if (optimize > 1 && !optimize_size)
798 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
800 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
801 TARGET_CBRANCHDI4 = 1;
802 TARGET_CMPEQDI_T = 0;
804 sh_cpu = PROCESSOR_SH1;
805 assembler_dialect = 0;
807 sh_cpu = PROCESSOR_SH2;
809 sh_cpu = PROCESSOR_SH2E;
811 sh_cpu = PROCESSOR_SH2A;
813 sh_cpu = PROCESSOR_SH3;
815 sh_cpu = PROCESSOR_SH3E;
818 assembler_dialect = 1;
819 sh_cpu = PROCESSOR_SH4;
821 if (TARGET_SH4A_ARCH)
823 assembler_dialect = 1;
824 sh_cpu = PROCESSOR_SH4A;
828 sh_cpu = PROCESSOR_SH5;
829 target_flags |= MASK_ALIGN_DOUBLE;
830 if (TARGET_SHMEDIA_FPU)
831 target_flags |= MASK_FMOVD;
834 /* There are no delay slots on SHmedia. */
835 flag_delayed_branch = 0;
836 /* Relaxation isn't yet supported for SHmedia */
837 target_flags &= ~MASK_RELAX;
838 /* After reload, if conversion does little good but can cause
840 - find_if_block doesn't do anything for SH because we don't
841 have conditional execution patterns. (We use conditional
842 move patterns, which are handled differently, and only
844 - find_cond_trap doesn't do anything for the SH because we
845 don't have conditional traps.
846 - find_if_case_1 uses redirect_edge_and_branch_force in
847 the only path that does an optimization, and this causes
848 an ICE when branch targets are in registers.
849 - find_if_case_2 doesn't do anything for the SHmedia after
850 reload except when it can redirect a tablejump - and
851 that's rather rare. */
852 flag_if_conversion2 = 0;
853 if (! strcmp (sh_div_str, "call"))
854 sh_div_strategy = SH_DIV_CALL;
855 else if (! strcmp (sh_div_str, "call2"))
856 sh_div_strategy = SH_DIV_CALL2;
857 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
858 sh_div_strategy = SH_DIV_FP;
859 else if (! strcmp (sh_div_str, "inv"))
860 sh_div_strategy = SH_DIV_INV;
861 else if (! strcmp (sh_div_str, "inv:minlat"))
862 sh_div_strategy = SH_DIV_INV_MINLAT;
863 else if (! strcmp (sh_div_str, "inv20u"))
864 sh_div_strategy = SH_DIV_INV20U;
865 else if (! strcmp (sh_div_str, "inv20l"))
866 sh_div_strategy = SH_DIV_INV20L;
867 else if (! strcmp (sh_div_str, "inv:call2"))
868 sh_div_strategy = SH_DIV_INV_CALL2;
869 else if (! strcmp (sh_div_str, "inv:call"))
870 sh_div_strategy = SH_DIV_INV_CALL;
871 else if (! strcmp (sh_div_str, "inv:fp"))
874 sh_div_strategy = SH_DIV_INV_FP;
876 sh_div_strategy = SH_DIV_INV;
878 TARGET_CBRANCHDI4 = 0;
879 /* Assembler CFI isn't yet fully supported for SHmedia. */
880 flag_dwarf2_cfi_asm = 0;
885 /* Only the sh64-elf assembler fully supports .quad properly. */
886 targetm.asm_out.aligned_op.di = NULL;
887 targetm.asm_out.unaligned_op.di = NULL;
890 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
891 Disable it for everything else. */
892 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
893 TARGET_USERMODE = false;
897 if (! strcmp (sh_div_str, "call-div1"))
898 sh_div_strategy = SH_DIV_CALL_DIV1;
899 else if (! strcmp (sh_div_str, "call-fp")
900 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
901 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
902 sh_div_strategy = SH_DIV_CALL_FP;
903 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
904 sh_div_strategy = SH_DIV_CALL_TABLE;
906 /* Pick one that makes most sense for the target in general.
907 It is not much good to use different functions depending
908 on -Os, since then we'll end up with two different functions
909 when some of the code is compiled for size, and some for
912 /* SH4 tends to emphasize speed. */
914 sh_div_strategy = SH_DIV_CALL_TABLE;
915 /* These have their own way of doing things. */
916 else if (TARGET_SH2A)
917 sh_div_strategy = SH_DIV_INTRINSIC;
918 /* ??? Should we use the integer SHmedia function instead? */
919 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
920 sh_div_strategy = SH_DIV_CALL_FP;
921 /* SH1 .. SH3 cores often go into small-footprint systems, so
922 default to the smallest implementation available. */
924 sh_div_strategy = SH_DIV_CALL_DIV1;
927 TARGET_PRETEND_CMOVE = 0;
928 if (sh_divsi3_libfunc[0])
929 ; /* User supplied - leave it alone. */
930 else if (TARGET_DIVIDE_CALL_FP)
931 sh_divsi3_libfunc = "__sdivsi3_i4";
932 else if (TARGET_DIVIDE_CALL_TABLE)
933 sh_divsi3_libfunc = "__sdivsi3_i4i";
935 sh_divsi3_libfunc = "__sdivsi3_1";
937 sh_divsi3_libfunc = "__sdivsi3";
939 if (sh_branch_cost == -1)
941 /* The SH1 does not have delay slots, hence we get a pipeline stall
942 at every branch. The SH4 is superscalar, so the single delay slot
943 is not sufficient to keep both pipelines filled.
944 In any case, set the default branch cost to '2', as it results in
945 slightly overall smaller code and also enables some if conversions
946 that are required for matching special T bit related insns. */
950 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
951 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
952 TARGET_ZDCBRANCH = 1;
954 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
955 if (! VALID_REGISTER_P (regno))
956 sh_register_names[regno][0] = '\0';
958 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
959 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
960 sh_additional_register_names[regno][0] = '\0';
962 if ((flag_pic && ! TARGET_PREFERGOT)
963 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
964 flag_no_function_cse = 1;
966 if (targetm.small_register_classes_for_mode_p (VOIDmode))
968 /* Never run scheduling before reload, since that can
969 break global alloc, and generates slower code anyway due
970 to the pressure on R0. */
971 /* Enable sched1 for SH4 if the user explicitly requests.
972 When sched1 is enabled, the ready queue will be reordered by
973 the target hooks if pressure is high. We can not do this for
974 PIC, SH3 and lower as they give spill failures for R0. */
975 if (!TARGET_HARD_SH4 || flag_pic)
976 flag_schedule_insns = 0;
977 /* ??? Current exception handling places basic block boundaries
978 after call_insns. It causes the high pressure on R0 and gives
979 spill failures for R0 in reload. See PR 22553 and the thread
981 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
982 else if (flag_exceptions)
984 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
985 warning (0, "ignoring -fschedule-insns because of exception "
987 flag_schedule_insns = 0;
989 else if (flag_schedule_insns
990 && !global_options_set.x_flag_schedule_insns)
991 flag_schedule_insns = 0;
994 /* Unwind info is not correct around the CFG unless either a frame
995 pointer is present or M_A_O_A is set. Fixing this requires rewriting
996 unwind info generation to be aware of the CFG and propagating states
998 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
999 || flag_exceptions || flag_non_call_exceptions)
1000 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1002 warning (0, "unwind tables currently require either a frame pointer "
1003 "or -maccumulate-outgoing-args for correctness");
1004 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1007 /* Unwinding with -freorder-blocks-and-partition does not work on this
1008 architecture, because it requires far jumps to label crossing between
1009 hot/cold sections which are rejected on this architecture. */
1010 if (flag_reorder_blocks_and_partition)
1012 if (flag_exceptions)
1014 inform (input_location,
1015 "-freorder-blocks-and-partition does not work with "
1016 "exceptions on this architecture");
1017 flag_reorder_blocks_and_partition = 0;
1018 flag_reorder_blocks = 1;
1020 else if (flag_unwind_tables)
1022 inform (input_location,
1023 "-freorder-blocks-and-partition does not support unwind "
1024 "info on this architecture");
1025 flag_reorder_blocks_and_partition = 0;
1026 flag_reorder_blocks = 1;
1030 /* Adjust loop, jump and function alignment values (in bytes), if those
1031 were not specified by the user using -falign-loops, -falign-jumps
1032 and -falign-functions options.
1033 32 bit alignment is better for speed, because instructions can be
1034 fetched as a pair from a longword boundary. For size use 16 bit
1035 alignment to get more compact code.
1036 Aligning all jumps increases the code size, even if it might
1037 result in slightly faster code. Thus, it is set to the smallest
1038 alignment possible if not specified by the user. */
1039 if (align_loops == 0)
1044 align_loops = optimize_size ? 2 : 4;
1047 if (align_jumps == 0)
1050 align_jumps = 1 << CACHE_LOG;
1054 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1055 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1057 if (align_functions == 0)
1060 align_functions = optimize_size
1061 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1063 align_functions = optimize_size ? 2 : 4;
1066 /* The linker relaxation code breaks when a function contains
1067 alignments that are larger than that at the start of a
1068 compilation unit. */
1071 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1073 /* Also take possible .long constants / mova tables into account. */
1076 if (align_functions < min_align)
1077 align_functions = min_align;
1080 if (flag_unsafe_math_optimizations)
1082 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1083 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1086 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1087 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1091 /* Allow fsrra insn only if -funsafe-math-optimizations and
1092 -ffinite-math-only is enabled. */
1093 TARGET_FSRRA = TARGET_FSRRA
1094 && flag_unsafe_math_optimizations
1095 && flag_finite_math_only;
1097 /* If the -mieee option was not explicitly set by the user, turn it on
1098 unless -ffinite-math-only was specified. See also PR 33135. */
1099 if (! global_options_set.x_TARGET_IEEE)
1100 TARGET_IEEE = ! flag_finite_math_only;
1102 if (sh_fixed_range_str)
1103 sh_fix_range (sh_fixed_range_str);
1105 /* This target defaults to strict volatile bitfields. */
1106 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1107 flag_strict_volatile_bitfields = 1;
1109 /* Parse atomic model option and make sure it is valid for the current
1111 selected_atomic_model_
1112 = parse_validate_atomic_model_option (sh_atomic_model_str);
1114 register_sh_passes ();
1117 /* Print the operand address in x to the stream. */
1119 sh_print_operand_address (FILE *stream, rtx x)
1121 switch (GET_CODE (x))
1125 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1130 rtx base = XEXP (x, 0);
1131 rtx index = XEXP (x, 1);
1133 switch (GET_CODE (index))
1136 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1137 reg_names[true_regnum (base)]);
1143 int base_num = true_regnum (base);
1144 int index_num = true_regnum (index);
1146 fprintf (stream, "@(r0,%s)",
1147 reg_names[MAX (base_num, index_num)]);
1158 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1162 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1166 x = mark_constant_pool_use (x);
1167 output_addr_const (stream, x);
1172 /* Print operand x (an rtx) in assembler syntax to file stream
1173 according to modifier code.
1175 '.' print a .s if insn needs delay slot
1176 ',' print LOCAL_LABEL_PREFIX
1177 '@' print trap, rte or rts depending upon pragma interruptness
1178 '#' output a nop if there is nothing to put in the delay slot
1179 ''' print likelihood suffix (/u for unlikely).
1180 '>' print branch target if -fverbose-asm
1181 'O' print a constant without the #
1182 'R' print the LSW of a dp value - changes if in little endian
1183 'S' print the MSW of a dp value - changes if in little endian
1184 'T' print the next word of a dp value - same as 'R' in big endian mode.
1185 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1186 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1187 'N' print 'r63' if the operand is (const_int 0).
1188 'd' print a V2SF reg as dN instead of fpN.
1189 'm' print a pair `base,offset' or `base,index', for LD and ST.
1190 'U' Likewise for {LD,ST}{HI,LO}.
1191 'V' print the position of a single bit set.
1192 'W' print the position of a single bit cleared.
1193 't' print a memory address which is a register.
1194 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1195 'o' output an operator. */
1197 sh_print_operand (FILE *stream, rtx x, int code)
1200 enum machine_mode mode;
1208 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1209 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1210 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1213 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1216 trapa_attr = lookup_attribute ("trap_exit",
1217 DECL_ATTRIBUTES (current_function_decl));
1219 fprintf (stream, "trapa #%ld",
1220 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1221 else if (sh_cfun_interrupt_handler_p ())
1223 if (sh_cfun_resbank_handler_p ())
1224 fprintf (stream, "resbank\n");
1225 fprintf (stream, "rte");
1228 fprintf (stream, "rts");
1231 /* Output a nop if there's nothing in the delay slot. */
1232 if (dbr_sequence_length () == 0)
1233 fprintf (stream, "\n\tnop");
1237 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1239 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1240 fputs ("/u", stream);
1244 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1246 fputs ("\t! target: ", stream);
1247 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1251 x = mark_constant_pool_use (x);
1252 output_addr_const (stream, x);
1254 /* N.B.: %R / %S / %T adjust memory addresses by four.
1255 For SHMEDIA, that means they can be used to access the first and
1256 second 32 bit part of a 64 bit (or larger) value that
1257 might be held in floating point registers or memory.
1258 While they can be used to access 64 bit parts of a larger value
1259 held in general purpose registers, that won't work with memory -
1260 neither for fp registers, since the frxx names are used. */
1262 if (REG_P (x) || GET_CODE (x) == SUBREG)
1264 regno = true_regnum (x);
1265 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1266 fputs (reg_names[regno], (stream));
1270 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1271 sh_print_operand_address (stream, XEXP (x, 0));
1277 mode = GET_MODE (x);
1278 if (mode == VOIDmode)
1280 if (GET_MODE_SIZE (mode) >= 8)
1281 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1283 sh_print_operand (stream, sub, 0);
1285 output_operand_lossage ("invalid operand to %%R");
1289 if (REG_P (x) || GET_CODE (x) == SUBREG)
1291 regno = true_regnum (x);
1292 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1293 fputs (reg_names[regno], (stream));
1297 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1298 sh_print_operand_address (stream, XEXP (x, 0));
1304 mode = GET_MODE (x);
1305 if (mode == VOIDmode)
1307 if (GET_MODE_SIZE (mode) >= 8)
1308 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1310 sh_print_operand (stream, sub, 0);
1312 output_operand_lossage ("invalid operand to %%S");
1316 /* Next word of a double. */
1317 switch (GET_CODE (x))
1320 fputs (reg_names[REGNO (x) + 1], (stream));
1323 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1324 && GET_CODE (XEXP (x, 0)) != POST_INC)
1325 x = adjust_address (x, SImode, 4);
1326 sh_print_operand_address (stream, XEXP (x, 0));
1334 gcc_assert (MEM_P (x));
1336 switch (GET_CODE (x))
1340 sh_print_operand (stream, x, 0);
1348 switch (GET_CODE (x))
1350 case PLUS: fputs ("add", stream); break;
1351 case MINUS: fputs ("sub", stream); break;
1352 case MULT: fputs ("mul", stream); break;
1353 case DIV: fputs ("div", stream); break;
1354 case EQ: fputs ("eq", stream); break;
1355 case NE: fputs ("ne", stream); break;
1356 case GT: case LT: fputs ("gt", stream); break;
1357 case GE: case LE: fputs ("ge", stream); break;
1358 case GTU: case LTU: fputs ("gtu", stream); break;
1359 case GEU: case LEU: fputs ("geu", stream); break;
1368 && GET_CODE (XEXP (x, 0)) == PLUS
1369 && (REG_P (XEXP (XEXP (x, 0), 1))
1370 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1371 fputc ('x', stream);
1377 switch (GET_MODE (x))
1379 case QImode: fputs (".b", stream); break;
1380 case HImode: fputs (".w", stream); break;
1381 case SImode: fputs (".l", stream); break;
1382 case SFmode: fputs (".s", stream); break;
1383 case DFmode: fputs (".d", stream); break;
1384 default: gcc_unreachable ();
1391 gcc_assert (MEM_P (x));
1395 switch (GET_CODE (x))
1399 sh_print_operand (stream, x, 0);
1400 fputs (", 0", stream);
1404 sh_print_operand (stream, XEXP (x, 0), 0);
1405 fputs (", ", stream);
1406 sh_print_operand (stream, XEXP (x, 1), 0);
1416 int num = exact_log2 (INTVAL (x));
1417 gcc_assert (num >= 0);
1418 fprintf (stream, "#%d", num);
1424 int num = exact_log2 (~INTVAL (x));
1425 gcc_assert (num >= 0);
1426 fprintf (stream, "#%d", num);
1431 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1433 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1437 if (x == CONST0_RTX (GET_MODE (x)))
1439 fprintf ((stream), "r63");
1442 goto default_output;
1444 if (CONST_INT_P (x))
1446 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1454 mode = GET_MODE (x);
1456 switch (GET_CODE (x))
1460 rtx inner = XEXP (x, 0);
1462 enum machine_mode inner_mode;
1464 /* We might see SUBREGs with vector mode registers inside. */
1465 if (GET_CODE (inner) == SUBREG
1466 && (GET_MODE_SIZE (GET_MODE (inner))
1467 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1468 && subreg_lowpart_p (inner))
1469 inner = SUBREG_REG (inner);
1470 if (CONST_INT_P (inner))
1472 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1473 goto default_output;
1475 inner_mode = GET_MODE (inner);
1476 if (GET_CODE (inner) == SUBREG
1477 && (GET_MODE_SIZE (GET_MODE (inner))
1478 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1479 && REG_P (SUBREG_REG (inner)))
1481 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1482 GET_MODE (SUBREG_REG (inner)),
1483 SUBREG_BYTE (inner),
1485 inner = SUBREG_REG (inner);
1487 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1489 /* Floating point register pairs are always big endian;
1490 general purpose registers are 64 bit wide. */
1491 regno = REGNO (inner);
1492 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1493 - HARD_REGNO_NREGS (regno, mode))
1501 /* FIXME: We need this on SHmedia32 because reload generates
1502 some sign-extended HI or QI loads into DImode registers
1503 but, because Pmode is SImode, the address ends up with a
1504 subreg:SI of the DImode register. Maybe reload should be
1505 fixed so as to apply alter_subreg to such loads? */
1507 gcc_assert (trapping_target_operand (x, VOIDmode));
1508 x = XEXP (XEXP (x, 2), 0);
1509 goto default_output;
1511 gcc_assert (SUBREG_BYTE (x) == 0
1512 && REG_P (SUBREG_REG (x)));
1520 if (FP_REGISTER_P (regno)
1521 && mode == V16SFmode)
1522 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1523 else if (FP_REGISTER_P (REGNO (x))
1524 && mode == V4SFmode)
1525 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1527 && mode == V2SFmode)
1528 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1529 else if (FP_REGISTER_P (REGNO (x))
1530 && GET_MODE_SIZE (mode) > 4)
1531 fprintf ((stream), "d%s", reg_names[regno] + 1);
1533 fputs (reg_names[regno], (stream));
1537 output_address (XEXP (x, 0));
1542 fputc ('#', stream);
1543 output_addr_const (stream, x);
1551 sh_print_operand_punct_valid_p (unsigned char code)
1553 return (code == '.' || code == '#' || code == '@' || code == ','
1554 || code == '$' || code == '\'' || code == '>');
1557 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1559 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1561 if (GET_CODE (x) == UNSPEC)
1563 switch (XINT (x, 1))
1565 case UNSPEC_DATALABEL:
1566 fputs ("datalabel ", file);
1567 output_addr_const (file, XVECEXP (x, 0, 0));
1570 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1571 output_addr_const (file, XVECEXP (x, 0, 0));
1574 output_addr_const (file, XVECEXP (x, 0, 0));
1575 fputs ("@GOT", file);
1578 output_addr_const (file, XVECEXP (x, 0, 0));
1579 fputs ("@GOTOFF", file);
1582 output_addr_const (file, XVECEXP (x, 0, 0));
1583 fputs ("@PLT", file);
1586 output_addr_const (file, XVECEXP (x, 0, 0));
1587 fputs ("@GOTPLT", file);
1590 output_addr_const (file, XVECEXP (x, 0, 0));
1591 fputs ("@DTPOFF", file);
1593 case UNSPEC_GOTTPOFF:
1594 output_addr_const (file, XVECEXP (x, 0, 0));
1595 fputs ("@GOTTPOFF", file);
1598 output_addr_const (file, XVECEXP (x, 0, 0));
1599 fputs ("@TPOFF", file);
1604 /* LPCS stands for Label for PIC Call Site. */
1605 targetm.asm_out.generate_internal_label (name, "LPCS",
1606 INTVAL (XVECEXP (x, 0, 0)));
1607 assemble_name (file, name);
1610 case UNSPEC_EXTRACT_S16:
1611 case UNSPEC_EXTRACT_U16:
1615 val = XVECEXP (x, 0, 0);
1616 shift = XVECEXP (x, 0, 1);
1618 if (shift != const0_rtx)
1620 if (GET_CODE (val) == CONST
1621 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1624 output_addr_const (file, val);
1628 output_addr_const (file, val);
1629 if (shift != const0_rtx)
1631 fputs (" >> ", file);
1632 output_addr_const (file, shift);
1635 fputs (" & 65535)", file);
1639 output_addr_const (file, XVECEXP (x, 0, 0));
1641 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1644 output_addr_const (file, XVECEXP (x, 0, 1));
1648 output_addr_const (file, XVECEXP (x, 0, 1));
1650 case UNSPEC_PCREL_SYMOFF:
1651 output_addr_const (file, XVECEXP (x, 0, 0));
1653 output_addr_const (file, XVECEXP (x, 0, 1));
1654 fputs ("-.)", file);
1665 /* Encode symbol attributes of a SYMBOL_REF into its
1666 SYMBOL_REF_FLAGS. */
1668 sh_encode_section_info (tree decl, rtx rtl, int first)
1670 default_encode_section_info (decl, rtl, first);
1672 if (TREE_CODE (decl) == FUNCTION_DECL
1673 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1674 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1677 /* Prepare operands for a move define_expand; specifically, one of the
1678 operands must be in a register. */
1680 prepare_move_operands (rtx operands[], enum machine_mode mode)
1682 if ((mode == SImode || mode == DImode)
1684 && ! ((mode == Pmode || mode == ptr_mode)
1685 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1688 if (SYMBOLIC_CONST_P (operands[1]))
1690 if (MEM_P (operands[0]))
1691 operands[1] = force_reg (Pmode, operands[1]);
1692 else if (TARGET_SHMEDIA
1693 && GET_CODE (operands[1]) == LABEL_REF
1694 && target_reg_operand (operands[0], mode))
1698 temp = (!can_create_pseudo_p ()
1700 : gen_reg_rtx (Pmode));
1701 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1704 else if (GET_CODE (operands[1]) == CONST
1705 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1706 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1708 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1709 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1711 operands[1] = expand_binop (mode, add_optab, temp,
1712 XEXP (XEXP (operands[1], 0), 1),
1713 (!can_create_pseudo_p ()
1715 : gen_reg_rtx (Pmode)),
1716 0, OPTAB_LIB_WIDEN);
1720 if (! reload_in_progress && ! reload_completed)
1722 /* Copy the source to a register if both operands aren't registers. */
1723 if (! register_operand (operands[0], mode)
1724 && ! sh_register_operand (operands[1], mode))
1725 operands[1] = copy_to_mode_reg (mode, operands[1]);
1727 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1729 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1730 except that we can't use that function because it is static. */
1731 rtx new_rtx = change_address (operands[0], mode, 0);
1732 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1733 operands[0] = new_rtx;
1736 /* This case can happen while generating code to move the result
1737 of a library call to the target. Reject `st r0,@(rX,rY)' because
1738 reload will fail to find a spill register for rX, since r0 is already
1739 being used for the source. */
1741 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1742 && MEM_P (operands[0])
1743 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1744 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1745 operands[1] = copy_to_mode_reg (mode, operands[1]);
1748 if (mode == Pmode || mode == ptr_mode)
1751 enum tls_model tls_kind;
1755 if (GET_CODE (op1) == CONST
1756 && GET_CODE (XEXP (op1, 0)) == PLUS
1757 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1760 opc = XEXP (XEXP (op1, 0), 1);
1761 op1 = XEXP (XEXP (op1, 0), 0);
1766 if (! reload_in_progress && ! reload_completed
1767 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1769 rtx tga_op1, tga_ret, tmp, tmp2;
1772 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1773 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1774 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1776 /* Don't schedule insns for getting GOT address when
1777 the first scheduling is enabled, to avoid spill
1779 if (flag_schedule_insns)
1780 emit_insn (gen_blockage ());
1781 emit_insn (gen_GOTaddr2picreg ());
1782 emit_use (gen_rtx_REG (SImode, PIC_REG));
1783 if (flag_schedule_insns)
1784 emit_insn (gen_blockage ());
1789 case TLS_MODEL_GLOBAL_DYNAMIC:
1790 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1791 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1792 tmp = gen_reg_rtx (Pmode);
1793 emit_move_insn (tmp, tga_ret);
1797 case TLS_MODEL_LOCAL_DYNAMIC:
1798 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1799 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1801 tmp = gen_reg_rtx (Pmode);
1802 emit_move_insn (tmp, tga_ret);
1804 if (register_operand (op0, Pmode))
1807 tmp2 = gen_reg_rtx (Pmode);
1809 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1813 case TLS_MODEL_INITIAL_EXEC:
1814 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1815 tmp = gen_sym2GOTTPOFF (op1);
1816 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1820 case TLS_MODEL_LOCAL_EXEC:
1821 tmp2 = gen_reg_rtx (Pmode);
1822 emit_insn (gen_store_gbr (tmp2));
1823 tmp = gen_reg_rtx (Pmode);
1824 emit_insn (gen_symTPOFF2reg (tmp, op1));
1826 if (register_operand (op0, Pmode))
1829 op1 = gen_reg_rtx (Pmode);
1831 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1838 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1844 /* Implement the canonicalize_comparison target hook for the combine
1845 pass. For the target hook this function is invoked via
1846 sh_canonicalize_comparison. This function is also re-used to
1847 canonicalize comparisons in cbranch pattern expanders. */
1849 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1850 enum machine_mode mode,
1851 bool op0_preserve_value)
1853 /* When invoked from within the combine pass the mode is not specified,
1854 so try to get it from one of the operands. */
1855 if (mode == VOIDmode)
1856 mode = GET_MODE (op0);
1857 if (mode == VOIDmode)
1858 mode = GET_MODE (op1);
1860 // We need to have a mode to do something useful here.
1861 if (mode == VOIDmode)
1864 // Currently, we don't deal with floats here.
1865 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1868 // Make sure that the constant operand is the second operand.
1869 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1871 if (op0_preserve_value)
1874 std::swap (op0, op1);
1875 cmp = swap_condition (cmp);
1878 if (CONST_INT_P (op1))
1880 /* Try to adjust the constant operand in such a way that available
1881 comparison insns can be utilized better and the constant can be
1882 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1884 const HOST_WIDE_INT val = INTVAL (op1);
1886 /* x > -1 --> x >= 0
1887 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1889 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1890 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1892 cmp = cmp == GT ? GE : LT;
1893 op1 = gen_int_mode (val + 1, mode);
1897 x >= 0x80 --> x > 0x7F
1899 x < 0x80 --> x <= 0x7F */
1900 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1902 cmp = cmp == GE ? GT : LE;
1903 op1 = gen_int_mode (val - 1, mode);
1906 /* unsigned x >= 1 --> x != 0
1907 unsigned x < 1 --> x == 0 */
1908 else if (val == 1 && (cmp == GEU || cmp == LTU))
1910 cmp = cmp == GEU ? NE : EQ;
1911 op1 = CONST0_RTX (mode);
1914 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1915 unsigned x < 0x80 --> unsigned x < 0x7F */
1916 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1918 cmp = cmp == GEU ? GTU : LEU;
1919 op1 = gen_int_mode (val - 1, mode);
1922 /* unsigned x > 0 --> x != 0
1923 unsigned x <= 0 --> x == 0 */
1924 else if (val == 0 && (cmp == GTU || cmp == LEU))
1925 cmp = cmp == GTU ? NE : EQ;
1927 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1928 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1929 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1930 && val == 0x7FFFFFFF)
1932 cmp = cmp == GTU ? LT : GE;
1936 /* unsigned x >= 0x80000000 --> signed x < 0
1937 unsigned x < 0x80000000 --> signed x >= 0 */
1938 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1939 && (unsigned HOST_WIDE_INT)val
1940 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1942 cmp = cmp == GEU ? LT : GE;
1948 /* This function implements the canonicalize_comparison target hook.
1949 This wrapper around the internally used sh_canonicalize_comparison
1950 function is needed to do the enum rtx_code <-> int conversion.
1951 Target hooks cannot use enum rtx_code in its definition. */
1953 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1954 bool op0_preserve_value)
1956 enum rtx_code tmp_code = (enum rtx_code)*code;
1957 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1958 VOIDmode, op0_preserve_value);
1959 *code = (int)tmp_code;
1963 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1966 *p2 = INVALID_REGNUM;
1971 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1972 enum rtx_code comparison)
1974 /* The scratch reg is only available when this is invoked from within
1975 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1976 rtx scratch = NULL_RTX;
1978 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1979 comparison = GET_CODE (operands[0]);
1981 scratch = operands[4];
1983 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1986 /* Notice that this function is also invoked after reload by
1987 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1988 rtx op1 = operands[1];
1990 if (can_create_pseudo_p ())
1991 operands[1] = force_reg (mode, op1);
1992 /* When we are handling DImode comparisons, we want to keep constants so
1993 that we can optimize the component comparisons; however, memory loads
1994 are better issued as a whole so that they can be scheduled well.
1995 SImode equality comparisons allow I08 constants, but only when they
1996 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1997 into a register, that register might as well be r0, and we allow the
1998 constant. If it is already in a register, this is likely to be
1999 allocated to a different hard register, thus we load the constant into
2000 a register unless it is zero. */
2001 if (!REG_P (operands[2])
2002 && (!CONST_INT_P (operands[2])
2003 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2004 && ((comparison != EQ && comparison != NE)
2005 || (REG_P (op1) && REGNO (op1) != R0_REG)
2006 || !satisfies_constraint_I08 (operands[2])))))
2008 if (scratch && GET_MODE (scratch) == mode)
2010 emit_move_insn (scratch, operands[2]);
2011 operands[2] = scratch;
2013 else if (can_create_pseudo_p ())
2014 operands[2] = force_reg (mode, operands[2]);
2020 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2022 rtx (*branch_expander) (rtx) = gen_branch_true;
2023 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2026 case NE: case LT: case LE: case LTU: case LEU:
2027 comparison = reverse_condition (comparison);
2028 branch_expander = gen_branch_false;
2031 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2032 gen_rtx_fmt_ee (comparison, SImode,
2033 operands[1], operands[2])));
2034 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2035 if (probability >= 0)
2036 add_int_reg_note (jump, REG_BR_PROB, probability);
2039 /* ??? How should we distribute probabilities when more than one branch
2040 is generated. So far we only have some ad-hoc observations:
2041 - If the operands are random, they are likely to differ in both parts.
2042 - If comparing items in a hash chain, the operands are random or equal;
2043 operation should be EQ or NE.
2044 - If items are searched in an ordered tree from the root, we can expect
2045 the highpart to be unequal about half of the time; operation should be
2046 an inequality comparison, operands non-constant, and overall probability
2047 about 50%. Likewise for quicksort.
2048 - Range checks will be often made against constants. Even if we assume for
2049 simplicity an even distribution of the non-constant operand over a
2050 sub-range here, the same probability could be generated with differently
2051 wide sub-ranges - as long as the ratio of the part of the subrange that
2052 is before the threshold to the part that comes after the threshold stays
2053 the same. Thus, we can't really tell anything here;
2054 assuming random distribution is at least simple.
2057 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2059 enum rtx_code msw_taken, msw_skip, lsw_taken;
2060 rtx_code_label *skip_label = NULL;
2061 rtx op1h, op1l, op2h, op2l;
2064 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2065 rtx scratch = operands[4];
2067 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2068 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2069 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2070 op1l = gen_lowpart (SImode, operands[1]);
2071 op2l = gen_lowpart (SImode, operands[2]);
2072 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2073 prob = split_branch_probability;
2074 rev_prob = REG_BR_PROB_BASE - prob;
2077 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2078 That costs 1 cycle more when the first branch can be predicted taken,
2079 but saves us mispredicts because only one branch needs prediction.
2080 It also enables generating the cmpeqdi_t-1 pattern. */
2082 if (TARGET_CMPEQDI_T)
2084 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2085 emit_jump_insn (gen_branch_true (operands[3]));
2092 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2093 msw_skip_prob = rev_prob;
2094 if (REG_BR_PROB_BASE <= 65535)
2095 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2101 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2102 / ((gcov_type) prob << 32)))
2108 if (TARGET_CMPEQDI_T)
2110 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2111 emit_jump_insn (gen_branch_false (operands[3]));
2115 msw_taken_prob = prob;
2120 msw_taken = comparison;
2121 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2123 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2124 msw_skip = swap_condition (msw_taken);
2128 if (op2l == CONST0_RTX (SImode))
2129 msw_taken = comparison;
2132 msw_taken = comparison == GE ? GT : GTU;
2133 msw_skip = swap_condition (msw_taken);
2138 msw_taken = comparison;
2139 if (op2l == CONST0_RTX (SImode))
2141 msw_skip = swap_condition (msw_taken);
2145 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2146 msw_taken = comparison;
2150 if (comparison == LE)
2152 else if (op2h != CONST0_RTX (SImode))
2156 msw_skip = swap_condition (LTU);
2159 msw_skip = swap_condition (msw_taken);
2162 default: return false;
2164 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2165 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2166 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2167 if (comparison != EQ && comparison != NE && num_branches > 1)
2169 if (!CONSTANT_P (operands[2])
2170 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2171 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2173 msw_taken_prob = prob / 2U;
2175 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2176 lsw_taken_prob = prob;
2180 msw_taken_prob = prob;
2181 msw_skip_prob = REG_BR_PROB_BASE;
2182 /* ??? If we have a constant op2h, should we use that when
2183 calculating lsw_taken_prob? */
2184 lsw_taken_prob = prob;
2189 operands[4] = NULL_RTX;
2190 if (reload_completed
2191 && ! arith_reg_or_0_operand (op2h, SImode)
2192 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2193 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2194 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2196 emit_move_insn (scratch, operands[2]);
2197 operands[2] = scratch;
2199 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2200 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2201 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2203 rtx taken_label = operands[3];
2205 /* Operands were possibly modified, but msw_skip doesn't expect this.
2206 Always use the original ones. */
2207 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2211 if (reload_completed
2212 && ! arith_reg_or_0_operand (op2h, SImode)
2213 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2215 emit_move_insn (scratch, operands[2]);
2216 operands[2] = scratch;
2220 operands[3] = skip_label = gen_label_rtx ();
2221 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2222 operands[3] = taken_label;
2226 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2228 if (reload_completed
2229 && ! arith_reg_or_0_operand (op2l, SImode)
2230 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2232 emit_move_insn (scratch, operands[2]);
2233 operands[2] = scratch;
2235 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2237 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2238 emit_label (skip_label);
2242 /* Given an operand, return 1 if the evaluated operand plugged into an
2243 if_then_else will result in a branch_true, 0 if branch_false, or
2244 -1 if neither nor applies. The truth table goes like this:
2246 op | cmpval | code | result
2247 ---------+--------+---------+--------------------
2248 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2249 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2250 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2251 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2252 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2253 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2254 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2255 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2257 sh_eval_treg_value (rtx op)
2259 if (t_reg_operand (op, GET_MODE (op)))
2261 if (negt_reg_operand (op, GET_MODE (op)))
2264 rtx_code code = GET_CODE (op);
2265 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2268 int cmpop = code == EQ ? 1 : 0;
2269 int cmpval = INTVAL (XEXP (op, 1));
2270 if (cmpval != 0 && cmpval != 1)
2274 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2276 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2281 return t ^ (cmpval == cmpop);
2284 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2287 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2289 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2291 insn = gen_rtx_PARALLEL (VOIDmode,
2293 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2294 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2300 /* Prepare the operands for an scc instruction; make sure that the
2301 compare has been done and the result is in T_REG. */
2303 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2305 rtx t_reg = get_t_reg_rtx ();
2306 enum rtx_code oldcode = code;
2307 enum machine_mode mode;
2309 /* First need a compare insn. */
2313 /* It isn't possible to handle this case. */
2330 if (code != oldcode)
2337 mode = GET_MODE (op0);
2338 if (mode == VOIDmode)
2339 mode = GET_MODE (op1);
2341 op0 = force_reg (mode, op0);
2342 if ((code != EQ && code != NE
2343 && (op1 != const0_rtx
2344 || code == GTU || code == GEU || code == LTU || code == LEU))
2345 || (mode == DImode && op1 != const0_rtx)
2346 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2347 op1 = force_reg (mode, op1);
2349 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2350 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2355 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2358 rtx target = gen_reg_rtx (SImode);
2361 gcc_assert (TARGET_SHMEDIA);
2370 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2371 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2381 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2382 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2400 rtx t2 = gen_reg_rtx (DImode);
2401 emit_insn (gen_extendsidi2 (t2, target));
2405 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2408 /* Called from the md file, set up the operands of a compare instruction. */
2410 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2412 enum rtx_code code = GET_CODE (operands[0]);
2413 enum rtx_code branch_code;
2414 rtx op0 = operands[1];
2415 rtx op1 = operands[2];
2417 bool need_ccmpeq = false;
2419 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2421 op0 = force_reg (mode, op0);
2422 op1 = force_reg (mode, op1);
2426 if (code != EQ || mode == DImode)
2428 /* Force args into regs, since we can't use constants here. */
2429 op0 = force_reg (mode, op0);
2430 if (op1 != const0_rtx || code == GTU || code == GEU)
2431 op1 = force_reg (mode, op1);
2435 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2438 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2439 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2441 tem = op0, op0 = op1, op1 = tem;
2442 code = swap_condition (code);
2445 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2448 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2453 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2454 to EQ/GT respectively. */
2455 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2472 branch_code = reverse_condition (code);
2478 insn = gen_rtx_SET (VOIDmode,
2480 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2482 sh_emit_set_t_insn (insn, mode);
2484 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2486 if (branch_code == code)
2487 emit_jump_insn (gen_branch_true (operands[3]));
2489 emit_jump_insn (gen_branch_false (operands[3]));
2493 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2495 enum rtx_code code = GET_CODE (operands[1]);
2496 rtx op0 = operands[2];
2497 rtx op1 = operands[3];
2498 rtx_code_label *lab = NULL;
2499 bool invert = false;
2502 op0 = force_reg (mode, op0);
2503 if ((code != EQ && code != NE
2504 && (op1 != const0_rtx
2505 || code == GTU || code == GEU || code == LTU || code == LEU))
2506 || (mode == DImode && op1 != const0_rtx)
2507 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2508 op1 = force_reg (mode, op1);
2510 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2512 if (code == LT || code == LE)
2514 code = swap_condition (code);
2515 tem = op0, op0 = op1, op1 = tem;
2521 lab = gen_label_rtx ();
2522 sh_emit_scc_to_t (EQ, op0, op1);
2523 emit_jump_insn (gen_branch_true (lab));
2540 sh_emit_scc_to_t (code, op0, op1);
2544 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2546 emit_move_insn (operands[0], get_t_reg_rtx ());
2549 /* Functions to output assembly code. */
2551 /* Return a sequence of instructions to perform DI or DF move.
2553 Since the SH cannot move a DI or DF in one instruction, we have
2554 to take care when we see overlapping source and dest registers. */
2556 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2557 enum machine_mode mode)
2559 rtx dst = operands[0];
2560 rtx src = operands[1];
2563 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2564 return "mov.l %T1,%0" "\n"
2567 if (register_operand (dst, mode)
2568 && register_operand (src, mode))
2570 if (REGNO (src) == MACH_REG)
2571 return "sts mach,%S0" "\n"
2574 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2575 when mov.d r1,r0 do r1->r0 then r2->r1. */
2576 if (REGNO (src) + 1 == REGNO (dst))
2577 return "mov %T1,%T0" "\n"
2580 return "mov %1,%0" "\n"
2583 else if (CONST_INT_P (src))
2585 if (INTVAL (src) < 0)
2586 output_asm_insn ("mov #-1,%S0", operands);
2588 output_asm_insn ("mov #0,%S0", operands);
2590 return "mov %1,%R0";
2592 else if (MEM_P (src))
2595 int dreg = REGNO (dst);
2596 rtx inside = XEXP (src, 0);
2598 switch (GET_CODE (inside))
2601 ptrreg = REGNO (inside);
2605 ptrreg = subreg_regno (inside);
2609 ptrreg = REGNO (XEXP (inside, 0));
2610 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2611 an offsettable address. Unfortunately, offsettable addresses use
2612 QImode to check the offset, and a QImode offsettable address
2613 requires r0 for the other operand, which is not currently
2614 supported, so we can't use the 'o' constraint.
2615 Thus we must check for and handle r0+REG addresses here.
2616 We punt for now, since this is likely very rare. */
2617 gcc_assert (!REG_P (XEXP (inside, 1)));
2621 return "mov.l %1,%0" "\n"
2624 return "mov.l %1,%0" "\n"
2630 /* Work out the safe way to copy. Copy into the second half first. */
2632 return "mov.l %T1,%T0" "\n"
2636 return "mov.l %1,%0" "\n"
2640 /* Print an instruction which would have gone into a delay slot after
2641 another instruction, but couldn't because the other instruction expanded
2642 into a sequence where putting the slot insn at the end wouldn't work. */
2644 print_slot (rtx insn)
2646 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2648 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2652 output_far_jump (rtx_insn *insn, rtx op)
2654 struct { rtx lab, reg, op; } this_jmp;
2655 rtx braf_base_lab = NULL_RTX;
2658 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2661 this_jmp.lab = gen_label_rtx ();
2665 && offset - get_attr_length (insn) <= 32766)
2668 jump = "mov.w %O0,%1" "\n"
2677 jump = "mov.l %O0,%1" "\n"
2680 jump = "mov.l r0,@-r15" "\n"
2682 " mov.l @r0,%1" "\n"
2684 " mov.l @r15+,r0" "\n"
2688 jump = "mov.l %O0,%1" "\n"
2691 /* If we have a scratch register available, use it. */
2692 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2693 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2695 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2696 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2697 jump = "mov.l r1,@-r15" "\n"
2699 " mov.l @r0,r1" "\n"
2701 " mov.l @r15+,r1" "\n"
2703 output_asm_insn (jump, &this_jmp.lab);
2704 if (dbr_sequence_length ())
2705 print_slot (final_sequence);
2707 output_asm_insn ("nop", 0);
2711 /* Output the delay slot insn first if any. */
2712 if (dbr_sequence_length ())
2713 print_slot (final_sequence);
2715 this_jmp.reg = gen_rtx_REG (SImode, 13);
2716 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2717 Fortunately, MACL is fixed and call-clobbered, and we never
2718 need its value across jumps, so save r13 in it instead of in
2721 output_asm_insn ("lds r13,macl", 0);
2723 output_asm_insn ("mov.l r13,@-r15", 0);
2724 output_asm_insn (jump, &this_jmp.lab);
2726 output_asm_insn ("sts macl,r13", 0);
2728 output_asm_insn ("mov.l @r15+,r13", 0);
2730 if (far && flag_pic && TARGET_SH2)
2732 braf_base_lab = gen_label_rtx ();
2733 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2734 CODE_LABEL_NUMBER (braf_base_lab));
2737 output_asm_insn (".align 2", 0);
2738 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2740 if (far && flag_pic)
2743 this_jmp.lab = braf_base_lab;
2744 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2747 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2751 /* Local label counter, used for constants in the pool and inside
2752 pattern branches. */
2753 static int lf = 100;
2755 /* Output code for ordinary branches. */
2757 output_branch (int logic, rtx insn, rtx *operands)
2759 switch (get_attr_length (insn))
2762 /* This can happen if filling the delay slot has caused a forward
2763 branch to exceed its range (we could reverse it, but only
2764 when we know we won't overextend other branches; this should
2765 best be handled by relaxation).
2766 It can also happen when other condbranches hoist delay slot insn
2767 from their destination, thus leading to code size increase.
2768 But the branch will still be in the range -4092..+4098 bytes. */
2772 /* The call to print_slot will clobber the operands. */
2773 rtx op0 = operands[0];
2775 /* If the instruction in the delay slot is annulled (true), then
2776 there is no delay slot where we can put it now. The only safe
2777 place for it is after the label. final will do that by default. */
2780 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2781 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2783 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2784 ASSEMBLER_DIALECT ? "/" : ".", label);
2785 print_slot (final_sequence);
2788 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2790 output_asm_insn ("bra\t%l0", &op0);
2791 fprintf (asm_out_file, "\tnop\n");
2792 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2796 /* When relaxing, handle this like a short branch. The linker
2797 will fix it up if it still doesn't fit after relaxation. */
2799 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2801 /* These are for SH2e, in which we have to account for the
2802 extra nop because of the hardware bug in annulled branches. */
2808 gcc_assert (!final_sequence
2809 || !(INSN_ANNULLED_BRANCH_P
2810 (XVECEXP (final_sequence, 0, 0))));
2811 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2813 ASSEMBLER_DIALECT ? "/" : ".", label);
2814 fprintf (asm_out_file, "\tnop\n");
2815 output_asm_insn ("bra\t%l0", operands);
2816 fprintf (asm_out_file, "\tnop\n");
2817 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2821 /* When relaxing, fall through. */
2826 sprintf (buffer, "b%s%ss\t%%l0",
2828 ASSEMBLER_DIALECT ? "/" : ".");
2829 output_asm_insn (buffer, &operands[0]);
2834 /* There should be no longer branches now - that would
2835 indicate that something has destroyed the branches set
2836 up in machine_dependent_reorg. */
2841 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2842 fill in operands 9 as a label to the successor insn.
2843 We try to use jump threading where possible.
2844 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2845 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2846 follow jmp and bt, if the address is in range. */
2848 output_branchy_insn (enum rtx_code code, const char *templ,
2849 rtx_insn *insn, rtx *operands)
2851 rtx_insn *next_insn = NEXT_INSN (insn);
2853 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2855 rtx src = SET_SRC (PATTERN (next_insn));
2856 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2858 /* Following branch not taken */
2859 operands[9] = gen_label_rtx ();
2860 emit_label_after (operands[9], next_insn);
2861 INSN_ADDRESSES_NEW (operands[9],
2862 INSN_ADDRESSES (INSN_UID (next_insn))
2863 + get_attr_length (next_insn));
2868 int offset = (branch_dest (next_insn)
2869 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2870 if (offset >= -252 && offset <= 258)
2872 if (GET_CODE (src) == IF_THEN_ELSE)
2874 src = XEXP (src, 1);
2880 operands[9] = gen_label_rtx ();
2881 emit_label_after (operands[9], insn);
2882 INSN_ADDRESSES_NEW (operands[9],
2883 INSN_ADDRESSES (INSN_UID (insn))
2884 + get_attr_length (insn));
2889 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2891 return output_branchy_insn (NE, "bt %l9" "\n"
2896 /* Output the start of the assembler file. */
2898 sh_file_start (void)
2900 default_file_start ();
2903 /* We need to show the text section with the proper
2904 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2905 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2906 will complain. We can teach GAS specifically about the
2907 default attributes for our choice of text section, but
2908 then we would have to change GAS again if/when we change
2909 the text section name. */
2910 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2912 /* Switch to the data section so that the coffsem symbol
2913 isn't in the text section. */
2914 switch_to_section (data_section);
2916 if (TARGET_LITTLE_ENDIAN)
2917 fputs ("\t.little\n", asm_out_file);
2921 if (TARGET_SHCOMPACT)
2922 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2923 else if (TARGET_SHMEDIA)
2924 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2925 TARGET_SHMEDIA64 ? 64 : 32);
2929 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2931 unspec_caller_rtx_p (rtx pat)
2936 split_const (pat, &base, &offset);
2937 if (GET_CODE (base) == UNSPEC)
2939 if (XINT (base, 1) == UNSPEC_CALLER)
2941 for (i = 0; i < XVECLEN (base, 0); i++)
2942 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2948 /* Indicate that INSN cannot be duplicated. This is true for insn
2949 that generates a unique label. */
2951 sh_cannot_copy_insn_p (rtx insn)
2955 if (!reload_completed || !flag_pic)
2958 if (!NONJUMP_INSN_P (insn))
2960 if (asm_noperands (insn) >= 0)
2963 pat = PATTERN (insn);
2964 if (GET_CODE (pat) != SET)
2966 pat = SET_SRC (pat);
2968 if (unspec_caller_rtx_p (pat))
2974 /* Number of instructions used to make an arithmetic right shift by N. */
2975 static const char ashiftrt_insns[] =
2976 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2978 /* Description of a logical left or right shift, when expanded to a sequence
2980 Notice that one bit right shifts clobber the T bit. One bit left shifts
2981 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2984 ASHL_CLOBBERS_T = 1 << 0,
2985 LSHR_CLOBBERS_T = 1 << 1
2988 struct ashl_lshr_sequence
2995 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2997 { 0, { 0 }, 0 }, // 0
2998 { 1, { 1 }, LSHR_CLOBBERS_T },
3000 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3001 { 2, { 2, 2 }, 0 }, // 4
3002 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3003 { 3, { 2, 2, 2 }, 0 },
3004 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3005 { 1, { 8 }, 0 }, // 8
3006 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3008 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3009 { 3, { 8, 2, 2 }, 0 }, // 12
3010 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3011 { 3, { 8, -2, 8 }, 0 },
3012 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3013 { 1, { 16 }, 0 }, // 16
3014 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3015 { 2, { 16, 2 }, 0 },
3016 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3017 { 3, { 16, 2, 2 }, 0 }, // 20
3018 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3019 { 3, { 16, -2, 8 }, 0 },
3020 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3021 { 2, { 16, 8 }, 0 }, // 24
3022 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3023 { 3, { 16, 8, 2 }, 0 },
3024 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3025 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3026 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3027 { 3, { 16, -2, 16 }, 0 },
3029 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3030 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3031 However, the shift-and combiner code needs this entry here to be in
3032 terms of real shift insns. */
3033 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3036 /* Individual shift amounts for shift amounts < 16, up to three highmost
3037 bits might be clobbered. This is typically used when combined with some
3038 kind of sign or zero extension. */
3039 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3041 { 0, { 0 }, 0 }, // 0
3042 { 1, { 1 }, LSHR_CLOBBERS_T },
3044 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3045 { 2, { 2, 2 }, 0 }, // 4
3046 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3047 { 2, { 8, -2 }, 0 },
3048 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3049 { 1, { 8 }, 0 }, // 8
3050 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3052 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3053 { 3, { 8, 2, 2 }, 0 }, // 12
3054 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3055 { 2, { 16, -2 }, 0 },
3056 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3057 { 1, { 16 }, 0 }, // 16
3058 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3059 { 2, { 16, 2 }, 0 },
3060 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3061 { 3, { 16, 2, 2 }, 0 }, // 20
3062 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3063 { 3, { 16, -2, 8 }, 0 },
3064 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3065 { 2, { 16, 8 }, 0 }, // 24
3066 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3067 { 3, { 16, 8, 2 }, 0 },
3068 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3069 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3070 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3071 { 3, { 16, -2, 16 }, 0 },
3072 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3075 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3076 will clobber the T bit. */
3078 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3080 gcc_assert (CONST_INT_P (shift_amount));
3082 const int shift_amount_i = INTVAL (shift_amount) & 31;
3084 /* Special case for shift count of 31: use and-rotl sequence. */
3085 if (shift_amount_i == 31)
3088 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3089 & ASHL_CLOBBERS_T) != 0;
3092 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3093 instructions will clobber the T bit. */
3095 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3097 gcc_assert (CONST_INT_P (shift_amount));
3099 const int shift_amount_i = INTVAL (shift_amount) & 31;
3101 /* Special case for shift count of 31: use shll-movt sequence. */
3102 if (shift_amount_i == 31)
3105 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3106 & LSHR_CLOBBERS_T) != 0;
3109 /* Return true if it is potentially beneficial to use a dynamic shift
3110 instruction (shad / shar) instead of a combination of 1/2/8/16
3111 shift instructions for the specified shift count.
3112 If dynamic shifts are not available, always return false. */
3114 sh_dynamicalize_shift_p (rtx count)
3116 gcc_assert (CONST_INT_P (count));
3118 const int shift_amount_i = INTVAL (count) & 31;
3121 /* For left and right shifts, there are shorter 2 insn sequences for
3122 shift amounts of 31. */
3123 if (shift_amount_i == 31)
3126 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3128 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3131 /* Assuming we have a value that has been sign-extended by at least one bit,
3132 can we use the ext_shift_amounts with the last shift turned to an
3133 arithmetic shift to shift it by N without data loss, and quicker than by
3135 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3137 /* Return the cost of a shift. */
3146 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3148 if (GET_MODE (x) == DImode
3149 && CONST_INT_P (XEXP (x, 1))
3150 && INTVAL (XEXP (x, 1)) == 1)
3153 /* Everything else is invalid, because there is no pattern for it. */
3156 /* If shift by a non constant, then this will be expensive. */
3157 if (!CONST_INT_P (XEXP (x, 1)))
3158 return SH_DYNAMIC_SHIFT_COST;
3160 /* Otherwise, return the true cost in instructions. Cope with out of range
3161 shift counts more or less arbitrarily. */
3162 value = INTVAL (XEXP (x, 1)) & 31;
3164 if (GET_CODE (x) == ASHIFTRT)
3166 int cost = ashiftrt_insns[value];
3167 /* If dynamic shifts are available and profitable in this case, then we
3168 put the constant in a reg and use shad. */
3169 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3170 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3174 return ashl_lshr_seq[value].insn_count;
3177 /* Return the cost of an AND/XOR/IOR operation. */
3179 and_xor_ior_costs (rtx x, int code)
3181 /* On SH1-4 we have only max. SImode operations.
3182 Double the cost for modes > SImode. */
3183 const int cost_scale = !TARGET_SHMEDIA
3184 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3187 /* A logical operation with two registers is a single cycle
3189 if (!CONST_INT_P (XEXP (x, 1)))
3190 return 1 * cost_scale;
3192 int i = INTVAL (XEXP (x, 1));
3196 if (satisfies_constraint_I10 (XEXP (x, 1))
3197 || satisfies_constraint_J16 (XEXP (x, 1)))
3200 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3203 /* These constants are single cycle extu.[bw] instructions. */
3204 if ((i == 0xff || i == 0xffff) && code == AND)
3205 return 1 * cost_scale;
3206 /* Constants that can be used in an instruction as an immediate are
3207 a single cycle, but this requires r0, so make it a little more
3209 if (CONST_OK_FOR_K08 (i))
3210 return 2 * cost_scale;
3211 /* Constants that can be loaded with a mov immediate need one more cycle.
3212 This case is probably unnecessary. */
3213 if (CONST_OK_FOR_I08 (i))
3214 return 2 * cost_scale;
3215 /* Any other constant requires an additional 2 cycle pc-relative load.
3216 This case is probably unnecessary. */
3217 return 3 * cost_scale;
3220 /* Return the cost of an addition or a subtraction. */
3224 if (GET_MODE (x) == SImode)
3226 /* The addc or subc patterns will eventually become one or two
3227 instructions. Below are some costs for some of the patterns
3228 which combine would reject because the costs of the individual
3229 insns in the patterns are lower.
3231 FIXME: It would be much easier if we had something like insn cost
3232 attributes and the cost calculation machinery used those attributes
3233 in the first place. This would eliminate redundant recog-like C
3234 code to calculate costs of complex patterns. */
3235 rtx op0 = XEXP (x, 0);
3236 rtx op1 = XEXP (x, 1);
3238 if (GET_CODE (x) == PLUS)
3240 if (GET_CODE (op0) == AND
3241 && XEXP (op0, 1) == const1_rtx
3242 && (GET_CODE (op1) == PLUS
3243 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3246 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3247 && GET_CODE (op1) == LSHIFTRT
3248 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3253 /* On SH1-4 we have only max. SImode operations.
3254 Double the cost for modes > SImode. */
3255 const int cost_scale = !TARGET_SHMEDIA
3256 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3259 /* Adding a register is a single cycle insn. */
3260 if (REG_P (XEXP (x, 1))
3261 || GET_CODE (XEXP (x, 1)) == SUBREG)
3262 return 1 * cost_scale;
3264 /* Likewise for small constants. */
3265 if (CONST_INT_P (XEXP (x, 1))
3266 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3267 return 1 * cost_scale;
3270 switch (GET_CODE (XEXP (x, 1)))
3275 return TARGET_SHMEDIA64 ? 5 : 3;
3278 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3280 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3282 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3290 /* Any other constant requires a 2 cycle pc-relative load plus an
3292 return 3 * cost_scale;
3295 /* Return the cost of a multiply. */
3297 multcosts (rtx x ATTRIBUTE_UNUSED)
3299 if (sh_multcost >= 0)
3302 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3303 accept constants. Ideally, we would use a cost of one or two and
3304 add the cost of the operand, but disregard the latter when inside loops
3305 and loop invariant code motion is still to follow.
3306 Using a multiply first and splitting it later if it's a loss
3307 doesn't work because of different sign / zero extension semantics
3308 of multiplies vs. shifts. */
3309 return optimize_size ? 2 : 3;
3313 /* We have a mul insn, so we can never take more than the mul and the
3314 read of the mac reg, but count more because of the latency and extra
3321 /* If we're aiming at small code, then just count the number of
3322 insns in a multiply call sequence. */
3326 /* Otherwise count all the insns in the routine we'd be calling too. */
3330 /* Compute a (partial) cost for rtx X. Return true if the complete
3331 cost has been computed, and false if subexpressions should be
3332 scanned. In either case, *TOTAL contains the cost result. */
3334 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3335 int *total, bool speed ATTRIBUTE_UNUSED)
3339 /* The lower-subreg pass decides whether to split multi-word regs
3340 into individual regs by looking at the cost for a SET of certain
3341 modes with the following patterns:
3343 (set (reg) (const_int 0))
3344 On machines that support vector-move operations a multi-word move
3345 is the same cost as individual reg move. On SH there is no
3346 vector-move, so we have to provide the correct cost in the number
3347 of move insns to load/store the reg of the mode in question. */
3349 if (register_operand (SET_DEST (x), VOIDmode)
3350 && (register_operand (SET_SRC (x), VOIDmode)
3351 || satisfies_constraint_Z (SET_SRC (x))))
3353 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3354 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3355 / mov_insn_size (mode, TARGET_SH2A));
3360 /* The cost of a mem access is mainly the cost of the address mode. */
3362 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3366 /* The cost of a sign or zero extend depends on whether the source is a
3367 reg or a mem. In case of a mem take the address into acount. */
3369 if (REG_P (XEXP (x, 0)))
3371 *total = COSTS_N_INSNS (1);
3374 if (MEM_P (XEXP (x, 0)))
3376 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3377 GET_MODE (XEXP (x, 0)),
3378 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3384 if (REG_P (XEXP (x, 0)))
3386 *total = COSTS_N_INSNS (1);
3389 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3390 && (GET_MODE (XEXP (x, 0)) == QImode
3391 || GET_MODE (XEXP (x, 0)) == HImode))
3393 /* Handle SH2A's movu.b and movu.w insn. */
3394 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3395 GET_MODE (XEXP (x, 0)),
3396 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3401 /* mems for SFmode and DFmode can be inside a parallel due to
3402 the way the fpscr is handled. */
3404 for (int i = 0; i < XVECLEN (x, 0); i++)
3406 rtx xx = XVECEXP (x, 0, i);
3407 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3409 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3410 GET_MODE (XEXP (xx, 0)),
3411 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3414 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3416 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3417 GET_MODE (XEXP (xx, 1)),
3418 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3423 if (sh_1el_vec (x, VOIDmode))
3424 *total = outer_code != SET;
3425 else if (sh_rep_vec (x, VOIDmode))
3426 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3427 + (outer_code != SET));
3429 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3435 if (INTVAL (x) == 0)
3437 else if (outer_code == AND && and_operand ((x), DImode))
3439 else if ((outer_code == IOR || outer_code == XOR
3440 || outer_code == PLUS)
3441 && CONST_OK_FOR_I10 (INTVAL (x)))
3443 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3444 *total = COSTS_N_INSNS (outer_code != SET);
3445 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3446 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3447 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3448 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3450 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3453 if (CONST_OK_FOR_I08 (INTVAL (x)))
3455 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3456 && CONST_OK_FOR_K08 (INTVAL (x)))
3458 /* prepare_cmp_insn will force costly constants int registers before
3459 the cbranch[sd]i4 patterns can see them, so preserve potentially
3460 interesting ones not covered by I08 above. */
3461 else if (outer_code == COMPARE
3462 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3463 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3464 || INTVAL (x) == 0x7fffffff
3465 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3472 /* An and with a constant compared against zero is
3473 most likely going to be a TST #imm, R0 instruction.
3474 Notice that this does not catch the zero_extract variants from
3476 if (GET_CODE (XEXP (x, 0)) == AND
3477 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3487 /* This is most likely a clips.b or clips.w insn that is being made up
3490 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3491 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3492 && REG_P (XEXP (XEXP (x, 0), 0))
3493 && CONST_INT_P (XEXP (x, 1)))
3495 *total = COSTS_N_INSNS (1);
3504 if (TARGET_SHMEDIA64)
3505 *total = COSTS_N_INSNS (4);
3506 else if (TARGET_SHMEDIA32)
3507 *total = COSTS_N_INSNS (2);
3514 *total = COSTS_N_INSNS (4);
3515 /* prepare_cmp_insn will force costly constants int registers before
3516 the cbranchdi4 pattern can see them, so preserve potentially
3517 interesting ones. */
3518 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3525 /* FIXME: This looks broken. Only the last statement has any effect.
3526 Probably this could be folded with the PARALLEL case? */
3527 if (x == CONST0_RTX (GET_MODE (x)))
3529 else if (sh_1el_vec (x, VOIDmode))
3530 *total = outer_code != SET;
3531 if (sh_rep_vec (x, VOIDmode))
3532 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3533 + (outer_code != SET));
3534 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3539 *total = COSTS_N_INSNS (addsubcosts (x));
3545 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3549 *total = COSTS_N_INSNS (multcosts (x));
3554 /* div0s sign comparison. */
3555 if (GET_CODE (XEXP (x, 0)) == XOR
3556 && REG_P ((XEXP (XEXP (x, 0), 0)))
3557 && REG_P ((XEXP (XEXP (x, 0), 1)))
3558 && satisfies_constraint_Z (XEXP (x, 1)))
3560 *total = COSTS_N_INSNS (1);
3567 /* div0s sign comparison. */
3568 if (GET_CODE (XEXP (x, 0)) == XOR
3569 && REG_P ((XEXP (XEXP (x, 0), 0)))
3570 && REG_P ((XEXP (XEXP (x, 0), 1)))
3571 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3573 *total = COSTS_N_INSNS (1);
3576 /* Fall through to shiftcosts. */
3580 int cost = shiftcosts (x);
3583 *total = COSTS_N_INSNS (cost);
3591 *total = COSTS_N_INSNS (20);
3604 /* Determine the size of the fundamental move insn that will be used
3605 for the specified mode. */
3607 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3609 const int mode_sz = GET_MODE_SIZE (mode);
3611 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3612 || (TARGET_FMOVD && mode == DFmode))
3616 /* The max. available mode for actual move insns is SImode.
3617 Larger accesses will be split into multiple loads/stores. */
3618 const int max_mov_sz = GET_MODE_SIZE (SImode);
3619 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3623 /* Determine the maximum possible displacement for a move insn for the
3626 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3628 /* The 4 byte displacement move insns are the same as the 2 byte
3629 versions but take a 12 bit displacement. All we need to do is to
3630 scale the max. displacement value accordingly. */
3631 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3633 /* SH2A supports FPU move insns with 12 bit displacements.
3634 Other variants to do not support any kind of displacements for
3636 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3640 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3641 const int mode_sz = GET_MODE_SIZE (mode);
3642 int r = 15 * mov_insn_sz * disp_scale;
3644 /* If the mov insn will be split into multiple loads/stores, the
3645 maximum possible displacement is a bit smaller. */
3646 if (mode_sz > mov_insn_sz)
3647 r -= mode_sz - mov_insn_sz;
3652 /* Determine the alignment mask for a move insn of the
3655 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3657 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3658 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3661 /* Return the displacement value of a displacement address. */
3663 sh_disp_addr_displacement (rtx x)
3665 gcc_assert (satisfies_constraint_Sdd (x));
3666 return INTVAL (XEXP (XEXP (x, 0), 1));
3669 /* Compute the cost of an address. */
3671 sh_address_cost (rtx x, enum machine_mode mode,
3672 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3674 /* 'GBR + 0'. Account one more because of R0 restriction. */
3675 if (REG_P (x) && REGNO (x) == GBR_REG)
3678 /* Simple reg, post-inc, pre-dec addressing. */
3679 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3682 /* 'reg + disp' addressing. */
3683 if (GET_CODE (x) == PLUS
3684 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3686 /* 'GBR + disp'. Account one more because of R0 restriction. */
3687 if (REGNO (XEXP (x, 0)) == GBR_REG
3688 && gbr_displacement (XEXP (x, 1), mode))
3691 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3696 /* The displacement would fit into a 2 byte move insn.
3697 HImode and QImode loads/stores with displacement put pressure on
3698 R0 which will most likely require another reg copy. Thus account
3699 a higher cost for that. */
3700 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3701 return (mode == HImode || mode == QImode) ? 2 : 1;
3703 /* The displacement would fit into a 4 byte move insn (SH2A). */
3705 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3708 /* The displacement is probably out of range and will require extra
3713 /* 'reg + reg' addressing. Account a slightly higher cost because of
3714 increased pressure on R0. */
3715 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3716 && ! TARGET_SHMEDIA)
3719 /* Not sure what it is - probably expensive. */
3723 /* Code to expand a shift. */
3725 gen_ashift (int type, int n, rtx reg)
3729 /* Negative values here come from the shift_amounts array. */
3739 n_rtx = GEN_INT (n);
3740 gcc_assert (satisfies_constraint_P27 (n_rtx));
3745 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3749 emit_insn (gen_shlr (reg, reg));
3751 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3754 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3761 /* Code to expand a HImode shift. */
3763 gen_ashift_hi (int type, int n, rtx reg)
3765 /* Negative values here come from the shift_amounts array. */
3779 /* We don't have HImode right shift operations because using the
3780 ordinary 32 bit shift instructions for that doesn't generate proper
3781 zero/sign extension.
3782 gen_ashift_hi is only called in contexts where we know that the
3783 sign extension works out correctly. */
3786 if (GET_CODE (reg) == SUBREG)
3788 offset = SUBREG_BYTE (reg);
3789 reg = SUBREG_REG (reg);
3791 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3795 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3800 /* Output RTL to split a constant shift into its component SH constant
3801 shift instructions. */
3803 gen_shifty_op (int code, rtx *operands)
3805 int value = INTVAL (operands[2]);
3808 /* Truncate the shift count in case it is out of bounds. */
3813 if (code == LSHIFTRT)
3815 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3816 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3819 else if (code == ASHIFT)
3821 /* There is a two instruction sequence for 31 bit left shifts,
3822 but it requires r0. */
3823 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3825 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3826 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3831 else if (value == 0)
3833 /* This can happen even when optimizing, if there were subregs before
3834 reload. Don't output a nop here, as this is never optimized away;
3835 use a no-op move instead. */
3836 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3840 max = ashl_lshr_seq[value].insn_count;
3841 for (i = 0; i < max; i++)
3842 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3845 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3848 gen_shifty_hi_op (int code, rtx *operands)
3850 int value = INTVAL (operands[2]);
3852 void (*gen_fun) (int, int, rtx);
3854 /* This operation is used by and_shl for SImode values with a few
3855 high bits known to be cleared. */
3859 emit_insn (gen_nop ());
3863 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3866 max = ext_ashl_lshr_seq[value].insn_count;
3867 for (i = 0; i < max; i++)
3868 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3871 /* When shifting right, emit the shifts in reverse order, so that
3872 solitary negative values come first. */
3873 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3874 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3877 /* Output RTL for an arithmetic right shift.
3878 ??? Rewrite to use super-optimizer sequences. */
3880 expand_ashiftrt (rtx *operands)
3886 if (TARGET_DYNSHIFT)
3888 if (!CONST_INT_P (operands[2]))
3890 rtx count = copy_to_mode_reg (SImode, operands[2]);
3891 emit_insn (gen_negsi2 (count, count));
3892 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3895 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3896 > 1 + SH_DYNAMIC_SHIFT_COST)
3899 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3900 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3904 if (!CONST_INT_P (operands[2]))
3907 value = INTVAL (operands[2]) & 31;
3911 /* If we are called from abs expansion, arrange things so that we
3912 we can use a single MT instruction that doesn't clobber the source,
3913 if LICM can hoist out the load of the constant zero. */
3914 if (currently_expanding_to_rtl)
3916 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3918 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3921 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3924 else if (value >= 16 && value <= 19)
3926 wrk = gen_reg_rtx (SImode);
3927 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3930 gen_ashift (ASHIFTRT, 1, wrk);
3931 emit_move_insn (operands[0], wrk);
3934 /* Expand a short sequence inline, longer call a magic routine. */
3935 else if (value <= 5)
3937 wrk = gen_reg_rtx (SImode);
3938 emit_move_insn (wrk, operands[1]);
3940 gen_ashift (ASHIFTRT, 1, wrk);
3941 emit_move_insn (operands[0], wrk);
3945 wrk = gen_reg_rtx (Pmode);
3947 /* Load the value into an arg reg and call a helper. */
3948 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3949 sprintf (func, "__ashiftrt_r4_%d", value);
3950 function_symbol (wrk, func, SFUNC_STATIC);
3951 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3952 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3956 /* Try to find a good way to implement the combiner pattern
3957 [(set (match_operand:SI 0 "register_operand" "r")
3958 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3959 (match_operand:SI 2 "const_int_operand" "n"))
3960 (match_operand:SI 3 "const_int_operand" "n"))) .
3961 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3962 return 0 for simple right / left or left/right shift combination.
3963 return 1 for a combination of shifts with zero_extend.
3964 return 2 for a combination of shifts with an AND that needs r0.
3965 return 3 for a combination of shifts with an AND that needs an extra
3966 scratch register, when the three highmost bits of the AND mask are clear.
3967 return 4 for a combination of shifts with an AND that needs an extra
3968 scratch register, when any of the three highmost bits of the AND mask
3970 If ATTRP is set, store an initial right shift width in ATTRP[0],
3971 and the instruction length in ATTRP[1] . These values are not valid
3973 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3974 shift_amounts for the last shift value that is to be used before the
3977 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3979 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3980 int left = INTVAL (left_rtx), right;
3982 int cost, best_cost = 10000;
3983 int best_right = 0, best_len = 0;
3987 if (left < 0 || left > 31)
3989 if (CONST_INT_P (mask_rtx))
3990 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3992 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3993 /* Can this be expressed as a right shift / left shift pair? */
3994 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3995 right = exact_log2 (lsb);
3996 mask2 = ~(mask + lsb - 1);
3997 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3998 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4000 best_cost = ashl_lshr_seq[right].insn_count
4001 + ashl_lshr_seq[right + left].insn_count;
4002 /* mask has no trailing zeroes <==> ! right */
4003 else if (! right && mask2 == ~(lsb2 - 1))
4005 int late_right = exact_log2 (lsb2);
4006 best_cost = ashl_lshr_seq[left + late_right].insn_count
4007 + ashl_lshr_seq[late_right].insn_count;
4009 /* Try to use zero extend. */
4010 if (mask2 == ~(lsb2 - 1))
4014 for (width = 8; width <= 16; width += 8)
4016 /* Can we zero-extend right away? */
4017 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4019 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4020 + ext_ashl_lshr_seq[left + right].insn_count;
4021 if (cost < best_cost)
4032 /* ??? Could try to put zero extend into initial right shift,
4033 or even shift a bit left before the right shift. */
4034 /* Determine value of first part of left shift, to get to the
4035 zero extend cut-off point. */
4036 first = width - exact_log2 (lsb2) + right;
4037 if (first >= 0 && right + left - first >= 0)
4039 cost = ext_ashl_lshr_seq[right].insn_count
4040 + ext_ashl_lshr_seq[first].insn_count + 1
4041 + ext_ashl_lshr_seq[right + left - first].insn_count;
4043 if (cost < best_cost)
4055 /* Try to use r0 AND pattern */
4056 for (i = 0; i <= 2; i++)
4060 if (! CONST_OK_FOR_K08 (mask >> i))
4062 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4063 if (cost < best_cost)
4068 best_len = cost - 1;
4071 /* Try to use a scratch register to hold the AND operand. */
4072 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4073 for (i = 0; i <= 2; i++)
4077 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4080 : ashl_lshr_seq)[left + i].insn_count;
4081 if (cost < best_cost)
4086 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4092 attrp[0] = best_right;
4093 attrp[1] = best_len;
4098 /* This is used in length attributes of the unnamed instructions
4099 corresponding to shl_and_kind return values of 1 and 2. */
4101 shl_and_length (rtx insn)
4103 rtx set_src, left_rtx, mask_rtx;
4106 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4107 left_rtx = XEXP (XEXP (set_src, 0), 1);
4108 mask_rtx = XEXP (set_src, 1);
4109 shl_and_kind (left_rtx, mask_rtx, attributes);
4110 return attributes[1];
4113 /* This is used in length attribute of the and_shl_scratch instruction. */
4115 shl_and_scr_length (rtx insn)
4117 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4118 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4119 rtx op = XEXP (set_src, 0);
4120 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4121 op = XEXP (XEXP (op, 0), 0);
4122 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4125 /* Generate rtl for instructions for which shl_and_kind advised a particular
4126 method of generating them, i.e. returned zero. */
4128 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4131 unsigned HOST_WIDE_INT mask;
4132 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4133 int right, total_shift;
4134 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4136 right = attributes[0];
4137 total_shift = INTVAL (left_rtx) + right;
4138 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4145 int first = attributes[2];
4150 emit_insn ((mask << right) <= 0xff
4151 ? gen_zero_extendqisi2 (dest,
4152 gen_lowpart (QImode, source))
4153 : gen_zero_extendhisi2 (dest,
4154 gen_lowpart (HImode, source)));
4158 emit_insn (gen_movsi (dest, source));
4162 operands[2] = GEN_INT (right);
4163 gen_shifty_hi_op (LSHIFTRT, operands);
4167 operands[2] = GEN_INT (first);
4168 gen_shifty_hi_op (ASHIFT, operands);
4169 total_shift -= first;
4173 emit_insn (mask <= 0xff
4174 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4175 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4176 if (total_shift > 0)
4178 operands[2] = GEN_INT (total_shift);
4179 gen_shifty_hi_op (ASHIFT, operands);
4184 shift_gen_fun = gen_shifty_op;
4186 /* If the topmost bit that matters is set, set the topmost bits
4187 that don't matter. This way, we might be able to get a shorter
4189 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4190 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4192 /* Don't expand fine-grained when combining, because that will
4193 make the pattern fail. */
4194 if (currently_expanding_to_rtl
4195 || reload_in_progress || reload_completed)
4199 /* Cases 3 and 4 should be handled by this split
4200 only while combining */
4201 gcc_assert (kind <= 2);
4204 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4207 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4212 operands[2] = GEN_INT (total_shift);
4213 shift_gen_fun (ASHIFT, operands);
4220 if (kind != 4 && total_shift < 16)
4222 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4224 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4228 emit_insn (gen_and_shl_scratch (dest, source,
4231 GEN_INT (total_shift + neg),
4233 emit_insn (gen_movsi (dest, dest));
4240 /* Try to find a good way to implement the combiner pattern
4241 [(set (match_operand:SI 0 "register_operand" "=r")
4242 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4243 (match_operand:SI 2 "const_int_operand" "n")
4244 (match_operand:SI 3 "const_int_operand" "n")
4246 (clobber (reg:SI T_REG))]
4247 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4248 return 0 for simple left / right shift combination.
4249 return 1 for left shift / 8 bit sign extend / left shift.
4250 return 2 for left shift / 16 bit sign extend / left shift.
4251 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4252 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4253 return 5 for left shift / 16 bit sign extend / right shift
4254 return 6 for < 8 bit sign extend / left shift.
4255 return 7 for < 8 bit sign extend / left shift / single right shift.
4256 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4258 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4260 int left, size, insize, ext;
4261 int cost = 0, best_cost;
4264 left = INTVAL (left_rtx);
4265 size = INTVAL (size_rtx);
4266 insize = size - left;
4267 gcc_assert (insize > 0);
4268 /* Default to left / right shift. */
4270 best_cost = ashl_lshr_seq[32 - insize].insn_count
4271 + ashl_lshr_seq[32 - size].insn_count;
4274 /* 16 bit shift / sign extend / 16 bit shift */
4275 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4276 + ashl_lshr_seq[16 - size].insn_count;
4277 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4278 below, by alternative 3 or something even better. */
4279 if (cost < best_cost)
4285 /* Try a plain sign extend between two shifts. */
4286 for (ext = 16; ext >= insize; ext -= 8)
4290 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4291 + ashl_lshr_seq[size - ext].insn_count;
4292 if (cost < best_cost)
4294 kind = ext / (unsigned) 8;
4298 /* Check if we can do a sloppy shift with a final signed shift
4299 restoring the sign. */
4300 if (EXT_SHIFT_SIGNED (size - ext))
4301 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4302 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4303 /* If not, maybe it's still cheaper to do the second shift sloppy,
4304 and do a final sign extend? */
4305 else if (size <= 16)
4306 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4307 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4311 if (cost < best_cost)
4313 kind = ext / (unsigned) 8 + 2;
4317 /* Check if we can sign extend in r0 */
4320 cost = 3 + ashl_lshr_seq[left].insn_count;
4321 if (cost < best_cost)
4326 /* Try the same with a final signed shift. */
4329 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4330 if (cost < best_cost)
4337 if (TARGET_DYNSHIFT)
4339 /* Try to use a dynamic shift. */
4340 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4341 if (cost < best_cost)
4352 /* Function to be used in the length attribute of the instructions
4353 implementing this pattern. */
4355 shl_sext_length (rtx insn)
4357 rtx set_src, left_rtx, size_rtx;
4360 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4361 left_rtx = XEXP (XEXP (set_src, 0), 1);
4362 size_rtx = XEXP (set_src, 1);
4363 shl_sext_kind (left_rtx, size_rtx, &cost);
4367 /* Generate rtl for this pattern */
4369 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4372 int left, size, insize, cost;
4375 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4376 left = INTVAL (left_rtx);
4377 size = INTVAL (size_rtx);
4378 insize = size - left;
4386 int ext = kind & 1 ? 8 : 16;
4387 int shift2 = size - ext;
4389 /* Don't expand fine-grained when combining, because that will
4390 make the pattern fail. */
4391 if (! currently_expanding_to_rtl
4392 && ! reload_in_progress && ! reload_completed)
4394 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4395 emit_insn (gen_movsi (dest, source));
4399 emit_insn (gen_movsi (dest, source));
4403 operands[2] = GEN_INT (ext - insize);
4404 gen_shifty_hi_op (ASHIFT, operands);
4407 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4408 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4413 operands[2] = GEN_INT (shift2);
4414 gen_shifty_op (ASHIFT, operands);
4421 if (EXT_SHIFT_SIGNED (shift2))
4423 operands[2] = GEN_INT (shift2 + 1);
4424 gen_shifty_op (ASHIFT, operands);
4425 operands[2] = const1_rtx;
4426 gen_shifty_op (ASHIFTRT, operands);
4429 operands[2] = GEN_INT (shift2);
4430 gen_shifty_hi_op (ASHIFT, operands);
4434 operands[2] = GEN_INT (-shift2);
4435 gen_shifty_hi_op (LSHIFTRT, operands);
4437 emit_insn (size <= 8
4438 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4439 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4446 if (! currently_expanding_to_rtl
4447 && ! reload_in_progress && ! reload_completed)
4448 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4452 operands[2] = GEN_INT (16 - insize);
4453 gen_shifty_hi_op (ASHIFT, operands);
4454 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4456 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4458 gen_ashift (ASHIFTRT, 1, dest);
4463 /* Don't expand fine-grained when combining, because that will
4464 make the pattern fail. */
4465 if (! currently_expanding_to_rtl
4466 && ! reload_in_progress && ! reload_completed)
4468 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4469 emit_insn (gen_movsi (dest, source));
4472 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4473 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4474 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4476 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4477 gen_shifty_op (ASHIFT, operands);
4479 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4487 /* Prefix a symbol_ref name with "datalabel". */
4489 gen_datalabel_ref (rtx sym)
4493 if (GET_CODE (sym) == LABEL_REF)
4494 return gen_rtx_CONST (GET_MODE (sym),
4495 gen_rtx_UNSPEC (GET_MODE (sym),
4499 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4501 str = XSTR (sym, 0);
4502 /* Share all SYMBOL_REF strings with the same value - that is important
4504 str = IDENTIFIER_POINTER (get_identifier (str));
4505 XSTR (sym, 0) = str;
4511 static alloc_pool label_ref_list_pool;
4513 typedef struct label_ref_list_d
4515 rtx_code_label *label;
4516 struct label_ref_list_d *next;
4517 } *label_ref_list_t;
4519 /* The SH cannot load a large constant into a register, constants have to
4520 come from a pc relative load. The reference of a pc relative load
4521 instruction must be less than 1k in front of the instruction. This
4522 means that we often have to dump a constant inside a function, and
4523 generate code to branch around it.
4525 It is important to minimize this, since the branches will slow things
4526 down and make things bigger.
4528 Worst case code looks like:
4546 We fix this by performing a scan before scheduling, which notices which
4547 instructions need to have their operands fetched from the constant table
4548 and builds the table.
4552 scan, find an instruction which needs a pcrel move. Look forward, find the
4553 last barrier which is within MAX_COUNT bytes of the requirement.
4554 If there isn't one, make one. Process all the instructions between
4555 the find and the barrier.
4557 In the above example, we can tell that L3 is within 1k of L1, so
4558 the first move can be shrunk from the 3 insn+constant sequence into
4559 just 1 insn, and the constant moved to L3 to make:
4570 Then the second move becomes the target for the shortening process. */
4574 rtx value; /* Value in table. */
4575 rtx_code_label *label; /* Label of value. */
4576 label_ref_list_t wend; /* End of window. */
4577 enum machine_mode mode; /* Mode of value. */
4579 /* True if this constant is accessed as part of a post-increment
4580 sequence. Note that HImode constants are never accessed in this way. */
4581 bool part_of_sequence_p;
4584 /* The maximum number of constants that can fit into one pool, since
4585 constants in the range 0..510 are at least 2 bytes long, and in the
4586 range from there to 1018 at least 4 bytes. */
4588 #define MAX_POOL_SIZE 372
4589 static pool_node pool_vector[MAX_POOL_SIZE];
4590 static int pool_size;
4591 static rtx_code_label *pool_window_label;
4592 static int pool_window_last;
4594 static int max_labelno_before_reorg;
4596 /* ??? If we need a constant in HImode which is the truncated value of a
4597 constant we need in SImode, we could combine the two entries thus saving
4598 two bytes. Is this common enough to be worth the effort of implementing
4601 /* ??? This stuff should be done at the same time that we shorten branches.
4602 As it is now, we must assume that all branches are the maximum size, and
4603 this causes us to almost always output constant pools sooner than
4606 /* Add a constant to the pool and return its label. */
4607 static rtx_code_label *
4608 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4611 rtx_code_label *lab, *new_rtx;
4612 label_ref_list_t ref, newref;
4614 /* First see if we've already got it. */
4615 for (i = 0; i < pool_size; i++)
4617 if (x->code == pool_vector[i].value->code
4618 && mode == pool_vector[i].mode)
4620 if (x->code == CODE_LABEL)
4622 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4625 if (rtx_equal_p (x, pool_vector[i].value))
4630 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4632 new_rtx = gen_label_rtx ();
4633 LABEL_REFS (new_rtx) = pool_vector[i].label;
4634 pool_vector[i].label = lab = new_rtx;
4636 if (lab && pool_window_label)
4638 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4639 newref->label = pool_window_label;
4640 ref = pool_vector[pool_window_last].wend;
4642 pool_vector[pool_window_last].wend = newref;
4645 pool_window_label = new_rtx;
4646 pool_window_last = i;
4652 /* Need a new one. */
4653 pool_vector[pool_size].value = x;
4654 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4657 pool_vector[pool_size - 1].part_of_sequence_p = true;
4660 lab = gen_label_rtx ();
4661 pool_vector[pool_size].mode = mode;
4662 pool_vector[pool_size].label = lab;
4663 pool_vector[pool_size].wend = NULL;
4664 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4665 if (lab && pool_window_label)
4667 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4668 newref->label = pool_window_label;
4669 ref = pool_vector[pool_window_last].wend;
4671 pool_vector[pool_window_last].wend = newref;
4674 pool_window_label = lab;
4675 pool_window_last = pool_size;
4680 /* Output the literal table. START, if nonzero, is the first instruction
4681 this table is needed for, and also indicates that there is at least one
4682 casesi_worker_2 instruction; We have to emit the operand3 labels from
4683 these insns at a 4-byte aligned position. BARRIER is the barrier
4684 after which we are to place the table. */
4686 dump_table (rtx_insn *start, rtx_insn *barrier)
4688 rtx_insn *scan = barrier;
4690 bool need_align = true;
4692 label_ref_list_t ref;
4693 bool have_df = false;
4695 /* Do two passes, first time dump out the HI sized constants. */
4697 for (i = 0; i < pool_size; i++)
4699 pool_node *p = &pool_vector[i];
4701 if (p->mode == HImode)
4705 scan = emit_insn_after (gen_align_2 (), scan);
4708 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4709 scan = emit_label_after (lab, scan);
4710 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4712 for (ref = p->wend; ref; ref = ref->next)
4715 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4718 else if (p->mode == DFmode)
4726 scan = emit_insn_after (gen_align_4 (), scan);
4728 for (; start != barrier; start = NEXT_INSN (start))
4729 if (NONJUMP_INSN_P (start)
4730 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4732 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4733 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4735 scan = emit_label_after (lab, scan);
4738 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4740 rtx align_insn = NULL_RTX;
4742 scan = emit_label_after (gen_label_rtx (), scan);
4743 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4746 for (i = 0; i < pool_size; i++)
4748 pool_node *p = &pool_vector[i];
4756 if (align_insn && !p->part_of_sequence_p)
4758 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4759 emit_label_before (lab, align_insn);
4760 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4762 for (ref = p->wend; ref; ref = ref->next)
4765 emit_insn_before (gen_consttable_window_end (lab),
4768 delete_insn (align_insn);
4769 align_insn = NULL_RTX;
4774 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4775 scan = emit_label_after (lab, scan);
4776 scan = emit_insn_after (gen_consttable_4 (p->value,
4778 need_align = ! need_align;
4784 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4789 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4790 scan = emit_label_after (lab, scan);
4791 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4798 if (p->mode != HImode)
4800 for (ref = p->wend; ref; ref = ref->next)
4803 scan = emit_insn_after (gen_consttable_window_end (lab),
4812 for (i = 0; i < pool_size; i++)
4814 pool_node *p = &pool_vector[i];
4825 scan = emit_label_after (gen_label_rtx (), scan);
4826 scan = emit_insn_after (gen_align_4 (), scan);
4828 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4829 scan = emit_label_after (lab, scan);
4830 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4838 scan = emit_label_after (gen_label_rtx (), scan);
4839 scan = emit_insn_after (gen_align_4 (), scan);
4841 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4842 scan = emit_label_after (lab, scan);
4843 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4850 if (p->mode != HImode)
4852 for (ref = p->wend; ref; ref = ref->next)
4855 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4860 scan = emit_insn_after (gen_consttable_end (), scan);
4861 scan = emit_barrier_after (scan);
4863 pool_window_label = NULL;
4864 pool_window_last = 0;
4867 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4869 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4871 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4872 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4873 need to fix it if the input value is CONST_OK_FOR_I08. */
4875 broken_move (rtx_insn *insn)
4877 if (NONJUMP_INSN_P (insn))
4879 rtx pat = PATTERN (insn);
4880 if (GET_CODE (pat) == PARALLEL)
4881 pat = XVECEXP (pat, 0, 0);
4882 if (GET_CODE (pat) == SET
4883 /* We can load any 8-bit value if we don't care what the high
4884 order bits end up as. */
4885 && GET_MODE (SET_DEST (pat)) != QImode
4886 && (CONSTANT_P (SET_SRC (pat))
4887 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4888 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4889 /* Match mova_const. */
4890 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4891 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4892 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4894 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4895 && (fp_zero_operand (SET_SRC (pat))
4896 || fp_one_operand (SET_SRC (pat)))
4897 /* In general we don't know the current setting of fpscr, so
4899 There is an exception if this was a register-register move
4900 before reload - and hence it was ascertained that we have
4901 single precision setting - and in a post-reload optimization
4902 we changed this to do a constant load. In that case
4903 we don't have an r0 clobber, hence we must use fldi. */
4905 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4907 && REG_P (SET_DEST (pat))
4908 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4910 && GET_MODE (SET_DEST (pat)) == SImode
4911 && (satisfies_constraint_I20 (SET_SRC (pat))
4912 || satisfies_constraint_I28 (SET_SRC (pat))))
4913 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4920 /* Return true if the specified insn is a mova insn. */
4922 mova_p (rtx_insn *insn)
4924 return (NONJUMP_INSN_P (insn)
4925 && GET_CODE (PATTERN (insn)) == SET
4926 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4927 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4928 /* Don't match mova_const. */
4929 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4932 /* Fix up a mova from a switch that went out of range. */
4934 fixup_mova (rtx_insn *mova)
4936 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4939 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4940 INSN_CODE (mova) = -1;
4945 rtx lab = gen_label_rtx ();
4946 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4950 worker = NEXT_INSN (worker);
4952 && !LABEL_P (worker)
4953 && !JUMP_P (worker));
4954 } while (NOTE_P (worker)
4955 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4956 wpat = PATTERN (worker);
4957 wpat0 = XVECEXP (wpat, 0, 0);
4958 wpat1 = XVECEXP (wpat, 0, 1);
4959 wsrc = SET_SRC (wpat0);
4960 PATTERN (worker) = (gen_casesi_worker_2
4961 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4962 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4964 INSN_CODE (worker) = -1;
4965 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4966 base = gen_rtx_LABEL_REF (Pmode, lab);
4967 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4968 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4969 INSN_CODE (mova) = -1;
4973 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4974 *num_mova, and check if the new mova is not nested within the first one.
4975 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4976 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4978 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4980 int n_addr = 0; /* Initialization to shut up spurious warning. */
4981 int f_target, n_target = 0; /* Likewise. */
4985 /* If NEW_MOVA has no address yet, it will be handled later. */
4986 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4989 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4990 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4991 if (n_addr > n_target || n_addr + 1022 < n_target)
4993 /* Change the mova into a load.
4994 broken_move will then return true for it. */
4995 fixup_mova (new_mova);
5001 *first_mova = new_mova;
5006 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5011 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5012 > n_target - n_addr)
5014 fixup_mova (*first_mova);
5019 fixup_mova (new_mova);
5024 /* Find the last barrier from insn FROM which is close enough to hold the
5025 constant pool. If we can't find one, then create one near the end of
5028 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5037 int leading_mova = num_mova;
5038 rtx_insn *barrier_before_mova = NULL;
5039 rtx_insn *found_barrier = NULL;
5040 rtx_insn *good_barrier = NULL;
5043 rtx_insn *orig = from;
5044 rtx last_got = NULL_RTX;
5045 rtx_insn *last_symoff = NULL;
5047 /* For HImode: range is 510, add 4 because pc counts from address of
5048 second instruction after this one, subtract 2 for the jump instruction
5049 that we may need to emit before the table, subtract 2 for the instruction
5050 that fills the jump delay slot (in very rare cases, reorg will take an
5051 instruction from after the constant pool or will leave the delay slot
5052 empty). This gives 510.
5053 For SImode: range is 1020, add 4 because pc counts from address of
5054 second instruction after this one, subtract 2 in case pc is 2 byte
5055 aligned, subtract 2 for the jump instruction that we may need to emit
5056 before the table, subtract 2 for the instruction that fills the jump
5057 delay slot. This gives 1018. */
5059 /* The branch will always be shortened now that the reference address for
5060 forward branches is the successor address, thus we need no longer make
5061 adjustments to the [sh]i_limit for -O0. */
5066 while (from && count_si < si_limit && count_hi < hi_limit)
5068 int inc = get_attr_length (from);
5071 /* If this is a label that existed at the time of the compute_alignments
5072 call, determine the alignment. N.B. When find_barrier recurses for
5073 an out-of-reach mova, we might see labels at the start of previously
5074 inserted constant tables. */
5076 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5079 new_align = 1 << label_to_alignment (from);
5080 else if (BARRIER_P (prev_nonnote_insn (from)))
5081 new_align = 1 << barrier_align (from);
5086 /* In case we are scanning a constant table because of recursion, check
5087 for explicit alignments. If the table is long, we might be forced
5088 to emit the new table in front of it; the length of the alignment
5089 might be the last straw. */
5090 else if (NONJUMP_INSN_P (from)
5091 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5092 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5093 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5094 /* When we find the end of a constant table, paste the new constant
5095 at the end. That is better than putting it in front because
5096 this way, we don't need extra alignment for adding a 4-byte-aligned
5097 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5098 else if (NONJUMP_INSN_P (from)
5099 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5100 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5103 if (BARRIER_P (from))
5107 found_barrier = from;
5109 /* If we are at the end of the function, or in front of an alignment
5110 instruction, we need not insert an extra alignment. We prefer
5111 this kind of barrier. */
5112 if (barrier_align (from) > 2)
5113 good_barrier = from;
5115 /* If we are at the end of a hot/cold block, dump the constants
5117 next = NEXT_INSN (from);
5120 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5124 if (broken_move (from))
5127 enum machine_mode mode;
5129 pat = PATTERN (from);
5130 if (GET_CODE (pat) == PARALLEL)
5131 pat = XVECEXP (pat, 0, 0);
5132 src = SET_SRC (pat);
5133 dst = SET_DEST (pat);
5134 mode = GET_MODE (dst);
5136 /* GOT pcrelat setting comes in pair of
5139 instructions. (plus add r0,r12).
5140 Remember if we see one without the other. */
5141 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5142 last_got = last_got ? NULL_RTX : from;
5143 else if (PIC_ADDR_P (src))
5144 last_got = last_got ? NULL_RTX : from;
5146 /* We must explicitly check the mode, because sometimes the
5147 front end will generate code to load unsigned constants into
5148 HImode targets without properly sign extending them. */
5150 || (mode == SImode && satisfies_constraint_I16 (src)
5151 && REGNO (dst) != FPUL_REG))
5154 /* We put the short constants before the long constants, so
5155 we must count the length of short constants in the range
5156 for the long constants. */
5157 /* ??? This isn't optimal, but is easy to do. */
5162 /* We dump DF/DI constants before SF/SI ones, because
5163 the limit is the same, but the alignment requirements
5164 are higher. We may waste up to 4 additional bytes
5165 for alignment, and the DF/DI constant may have
5166 another SF/SI constant placed before it. */
5167 if (TARGET_SHCOMPACT
5169 && (mode == DFmode || mode == DImode))
5174 while (si_align > 2 && found_si + si_align - 2 > count_si)
5176 if (found_si > count_si)
5177 count_si = found_si;
5178 found_si += GET_MODE_SIZE (mode);
5180 si_limit -= GET_MODE_SIZE (mode);
5186 switch (untangle_mova (&num_mova, &mova, from))
5191 rtx src = SET_SRC (PATTERN (from));
5192 if (GET_CODE (src) == CONST
5193 && GET_CODE (XEXP (src, 0)) == UNSPEC
5194 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5198 case 0: return find_barrier (0, 0, mova);
5203 = good_barrier ? good_barrier : found_barrier;
5207 if (found_si > count_si)
5208 count_si = found_si;
5210 else if (JUMP_TABLE_DATA_P (from)
5211 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5213 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5215 && (prev_nonnote_insn (from)
5216 == XEXP (MOVA_LABELREF (mova), 0))))
5218 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5220 /* We have just passed the barrier in front of the
5221 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5222 the ADDR_DIFF_VEC is accessed as data, just like our pool
5223 constants, this is a good opportunity to accommodate what
5224 we have gathered so far.
5225 If we waited any longer, we could end up at a barrier in
5226 front of code, which gives worse cache usage for separated
5227 instruction / data caches. */
5228 good_barrier = found_barrier;
5233 rtx body = PATTERN (from);
5234 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5237 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5238 else if (JUMP_P (from)
5243 /* There is a possibility that a bf is transformed into a bf/s by the
5244 delay slot scheduler. */
5246 && get_attr_type (from) == TYPE_CBRANCH
5247 && ! sequence_insn_p (from))
5253 if (new_align > si_align)
5255 si_limit -= (count_si - 1) & (new_align - si_align);
5256 si_align = new_align;
5258 count_si = (count_si + new_align - 1) & -new_align;
5263 if (new_align > hi_align)
5265 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5266 hi_align = new_align;
5268 count_hi = (count_hi + new_align - 1) & -new_align;
5270 from = NEXT_INSN (from);
5277 /* Try as we might, the leading mova is out of range. Change
5278 it into a load (which will become a pcload) and retry. */
5280 return find_barrier (0, 0, mova);
5284 /* Insert the constant pool table before the mova instruction,
5285 to prevent the mova label reference from going out of range. */
5287 good_barrier = found_barrier = barrier_before_mova;
5293 if (good_barrier && next_real_insn (found_barrier))
5294 found_barrier = good_barrier;
5298 /* We didn't find a barrier in time to dump our stuff,
5299 so we'll make one. */
5300 rtx_code_label *label = gen_label_rtx ();
5302 /* Don't emit a constant table in the middle of insns for
5303 casesi_worker_2. This is a bit overkill but is enough
5304 because casesi_worker_2 wouldn't appear so frequently. */
5308 /* If we exceeded the range, then we must back up over the last
5309 instruction we looked at. Otherwise, we just need to undo the
5310 NEXT_INSN at the end of the loop. */
5311 if (PREV_INSN (from) != orig
5312 && (count_hi > hi_limit || count_si > si_limit))
5313 from = PREV_INSN (PREV_INSN (from));
5315 from = PREV_INSN (from);
5317 /* Don't emit a constant table int the middle of global pointer setting,
5318 since that that would move the addressing base GOT into another table.
5319 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5320 in the pool anyway, so just move up the whole constant pool.
5322 However, avoid doing so when the last single GOT mov is the starting
5323 insn itself. Going past above the start insn would create a negative
5324 offset, causing errors. */
5325 if (last_got && last_got != orig)
5326 from = PREV_INSN (last_got);
5328 /* Don't insert the constant pool table at the position which
5329 may be the landing pad. */
5332 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5333 from = PREV_INSN (from);
5335 /* Walk back to be just before any jump or label.
5336 Putting it before a label reduces the number of times the branch
5337 around the constant pool table will be hit. Putting it before
5338 a jump makes it more likely that the bra delay slot will be
5340 while (NOTE_P (from) || JUMP_P (from)
5342 from = PREV_INSN (from);
5344 /* Make sure we do not split between a call and its corresponding
5345 CALL_ARG_LOCATION note. */
5348 rtx_insn *next = NEXT_INSN (from);
5349 if (next && NOTE_P (next)
5350 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5354 from = emit_jump_insn_after (gen_jump (label), from);
5355 JUMP_LABEL (from) = label;
5356 LABEL_NUSES (label) = 1;
5357 found_barrier = emit_barrier_after (from);
5358 emit_label_after (label, found_barrier);
5361 return found_barrier;
5364 /* If the instruction INSN is implemented by a special function, and we can
5365 positively find the register that is used to call the sfunc, and this
5366 register is not used anywhere else in this instruction - except as the
5367 destination of a set, return this register; else, return 0. */
5369 sfunc_uses_reg (rtx insn)
5372 rtx pattern, part, reg_part, reg;
5374 if (!NONJUMP_INSN_P (insn))
5376 pattern = PATTERN (insn);
5377 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5380 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5382 part = XVECEXP (pattern, 0, i);
5383 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5388 reg = XEXP (reg_part, 0);
5389 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5391 part = XVECEXP (pattern, 0, i);
5392 if (part == reg_part || GET_CODE (part) == CLOBBER)
5394 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5395 && REG_P (SET_DEST (part)))
5396 ? SET_SRC (part) : part)))
5402 /* See if the only way in which INSN uses REG is by calling it, or by
5403 setting it while calling it. Set *SET to a SET rtx if the register
5406 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5412 reg2 = sfunc_uses_reg (insn);
5413 if (reg2 && REGNO (reg2) == REGNO (reg))
5415 pattern = single_set (insn);
5417 && REG_P (SET_DEST (pattern))
5418 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5424 /* We don't use rtx_equal_p because we don't care if the mode is
5426 pattern = single_set (insn);
5428 && REG_P (SET_DEST (pattern))
5429 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5435 par = PATTERN (insn);
5436 if (GET_CODE (par) == PARALLEL)
5437 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5439 part = XVECEXP (par, 0, i);
5440 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5443 return reg_mentioned_p (reg, SET_SRC (pattern));
5449 pattern = PATTERN (insn);
5451 if (GET_CODE (pattern) == PARALLEL)
5455 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5456 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5458 pattern = XVECEXP (pattern, 0, 0);
5461 if (GET_CODE (pattern) == SET)
5463 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5465 /* We don't use rtx_equal_p, because we don't care if the
5466 mode is different. */
5467 if (!REG_P (SET_DEST (pattern))
5468 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5474 pattern = SET_SRC (pattern);
5477 if (GET_CODE (pattern) != CALL
5478 || !MEM_P (XEXP (pattern, 0))
5479 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5485 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5486 general registers. Bits 0..15 mean that the respective registers
5487 are used as inputs in the instruction. Bits 16..31 mean that the
5488 registers 0..15, respectively, are used as outputs, or are clobbered.
5489 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5491 regs_used (rtx x, int is_dest)
5499 code = GET_CODE (x);
5504 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5505 << (REGNO (x) + is_dest));
5509 rtx y = SUBREG_REG (x);
5514 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5516 subreg_regno_offset (REGNO (y),
5519 GET_MODE (x)) + is_dest));
5523 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5525 /* If there was a return value, it must have been indicated with USE. */
5540 fmt = GET_RTX_FORMAT (code);
5542 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5547 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5548 used |= regs_used (XVECEXP (x, i, j), is_dest);
5550 else if (fmt[i] == 'e')
5551 used |= regs_used (XEXP (x, i), is_dest);
5556 /* Create an instruction that prevents redirection of a conditional branch
5557 to the destination of the JUMP with address ADDR.
5558 If the branch needs to be implemented as an indirect jump, try to find
5559 a scratch register for it.
5560 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5561 If any preceding insn that doesn't fit into a delay slot is good enough,
5562 pass 1. Pass 2 if a definite blocking insn is needed.
5563 -1 is used internally to avoid deep recursion.
5564 If a blocking instruction is made or recognized, return it. */
5566 gen_block_redirect (rtx jump, int addr, int need_block)
5569 rtx prev = prev_nonnote_insn (jump);
5572 /* First, check if we already have an instruction that satisfies our need. */
5573 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5575 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5577 if (GET_CODE (PATTERN (prev)) == USE
5578 || GET_CODE (PATTERN (prev)) == CLOBBER
5579 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5581 else if ((need_block &= ~1) < 0)
5583 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5586 if (GET_CODE (PATTERN (jump)) == RETURN)
5590 /* Reorg even does nasty things with return insns that cause branches
5591 to go out of range - see find_end_label and callers. */
5592 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5594 /* We can't use JUMP_LABEL here because it might be undefined
5595 when not optimizing. */
5596 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5597 /* If the branch is out of range, try to find a scratch register for it. */
5599 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5603 /* Don't look for the stack pointer as a scratch register,
5604 it would cause trouble if an interrupt occurred. */
5605 unsigned attempt = 0x7fff, used;
5606 int jump_left = flag_expensive_optimizations + 1;
5608 /* It is likely that the most recent eligible instruction is wanted for
5609 the delay slot. Therefore, find out which registers it uses, and
5610 try to avoid using them. */
5612 for (scan = jump; (scan = PREV_INSN (scan)); )
5616 if (INSN_DELETED_P (scan))
5618 code = GET_CODE (scan);
5619 if (code == CODE_LABEL || code == JUMP_INSN)
5622 && GET_CODE (PATTERN (scan)) != USE
5623 && GET_CODE (PATTERN (scan)) != CLOBBER
5624 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5626 attempt &= ~regs_used (PATTERN (scan), 0);
5630 for (used = dead = 0, scan = JUMP_LABEL (jump);
5631 (scan = NEXT_INSN (scan)); )
5635 if (INSN_DELETED_P (scan))
5637 code = GET_CODE (scan);
5640 used |= regs_used (PATTERN (scan), 0);
5641 if (code == CALL_INSN)
5642 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5643 dead |= (used >> 16) & ~used;
5649 if (code == JUMP_INSN)
5651 if (jump_left-- && simplejump_p (scan))
5652 scan = JUMP_LABEL (scan);
5658 /* Mask out the stack pointer again, in case it was
5659 the only 'free' register we have found. */
5662 /* If the immediate destination is still in range, check for possible
5663 threading with a jump beyond the delay slot insn.
5664 Don't check if we are called recursively; the jump has been or will be
5665 checked in a different invocation then. */
5667 else if (optimize && need_block >= 0)
5669 rtx next = next_active_insn (next_active_insn (dest));
5670 if (next && JUMP_P (next)
5671 && GET_CODE (PATTERN (next)) == SET
5672 && recog_memoized (next) == CODE_FOR_jump_compact)
5674 dest = JUMP_LABEL (next);
5676 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5678 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5684 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5686 /* It would be nice if we could convert the jump into an indirect
5687 jump / far branch right now, and thus exposing all constituent
5688 instructions to further optimization. However, reorg uses
5689 simplejump_p to determine if there is an unconditional jump where
5690 it should try to schedule instructions from the target of the
5691 branch; simplejump_p fails for indirect jumps even if they have
5693 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5694 (reg, GEN_INT (unspec_bbr_uid++)),
5696 /* ??? We would like this to have the scope of the jump, but that
5697 scope will change when a delay slot insn of an inner scope is added.
5698 Hence, after delay slot scheduling, we'll have to expect
5699 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5702 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5703 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5706 else if (need_block)
5707 /* We can't use JUMP_LABEL here because it might be undefined
5708 when not optimizing. */
5709 return emit_insn_before (gen_block_branch_redirect
5710 (GEN_INT (unspec_bbr_uid++)),
5715 #define CONDJUMP_MIN -252
5716 #define CONDJUMP_MAX 262
5719 /* A label (to be placed) in front of the jump
5720 that jumps to our ultimate destination. */
5722 /* Where we are going to insert it if we cannot move the jump any farther,
5723 or the jump itself if we have picked up an existing jump. */
5725 /* The ultimate destination. */
5727 struct far_branch *prev;
5728 /* If the branch has already been created, its address;
5729 else the address of its first prospective user. */
5733 static void gen_far_branch (struct far_branch *);
5734 enum mdep_reorg_phase_e mdep_reorg_phase;
5736 gen_far_branch (struct far_branch *bp)
5738 rtx insn = bp->insert_place;
5740 rtx label = gen_label_rtx ();
5743 emit_label_after (label, insn);
5746 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5747 LABEL_NUSES (bp->far_label)++;
5750 jump = emit_jump_insn_after (gen_return (), insn);
5752 /* Emit a barrier so that reorg knows that any following instructions
5753 are not reachable via a fall-through path.
5754 But don't do this when not optimizing, since we wouldn't suppress the
5755 alignment for the barrier then, and could end up with out-of-range
5756 pc-relative loads. */
5758 emit_barrier_after (jump);
5759 emit_label_after (bp->near_label, insn);
5762 JUMP_LABEL (jump) = bp->far_label;
5765 rtx pat = PATTERN (jump);
5766 gcc_assert (ANY_RETURN_P (pat));
5767 JUMP_LABEL (jump) = pat;
5770 ok = invert_jump (insn, label, 1);
5773 /* If we are branching around a jump (rather than a return), prevent
5774 reorg from using an insn from the jump target as the delay slot insn -
5775 when reorg did this, it pessimized code (we rather hide the delay slot)
5776 and it could cause branches to go out of range. */
5779 (gen_stuff_delay_slot
5780 (GEN_INT (unspec_bbr_uid++),
5781 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5783 /* Prevent reorg from undoing our splits. */
5784 gen_block_redirect (jump, bp->address += 2, 2);
5787 /* Fix up ADDR_DIFF_VECs. */
5789 fixup_addr_diff_vecs (rtx first)
5793 for (insn = first; insn; insn = NEXT_INSN (insn))
5795 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5797 if (! JUMP_TABLE_DATA_P (insn)
5798 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5800 pat = PATTERN (insn);
5801 vec_lab = XEXP (XEXP (pat, 0), 0);
5803 /* Search the matching casesi_jump_2. */
5804 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5808 prevpat = PATTERN (prev);
5809 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5811 x = XVECEXP (prevpat, 0, 1);
5812 if (GET_CODE (x) != USE)
5815 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5818 /* FIXME: This is a bug in the optimizer, but it seems harmless
5819 to just avoid panicing. */
5823 /* Emit the reference label of the braf where it belongs, right after
5824 the casesi_jump_2 (i.e. braf). */
5825 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5826 emit_label_after (braf_label, prev);
5828 /* Fix up the ADDR_DIF_VEC to be relative
5829 to the reference address of the braf. */
5830 XEXP (XEXP (pat, 0), 0) = braf_label;
5834 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5835 a barrier. Return the base 2 logarithm of the desired alignment. */
5837 barrier_align (rtx barrier_or_label)
5841 if (! barrier_or_label)
5844 if (LABEL_P (barrier_or_label)
5845 && NEXT_INSN (barrier_or_label)
5846 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5849 if (BARRIER_P (barrier_or_label)
5850 && PREV_INSN (barrier_or_label)
5851 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5853 pat = PATTERN (PREV_INSN (barrier_or_label));
5854 /* If this is a very small table, we want to keep the alignment after
5855 the table to the minimum for proper code alignment. */
5856 return ((optimize_size
5857 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5858 <= (unsigned) 1 << (CACHE_LOG - 2)))
5859 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5862 next = next_active_insn (barrier_or_label);
5867 pat = PATTERN (next);
5869 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5870 /* This is a barrier in front of a constant table. */
5876 if (! TARGET_SH2 || ! optimize)
5877 return align_jumps_log;
5879 /* When fixing up pcloads, a constant table might be inserted just before
5880 the basic block that ends with the barrier. Thus, we can't trust the
5881 instruction lengths before that. */
5882 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5884 /* Check if there is an immediately preceding branch to the insn beyond
5885 the barrier. We must weight the cost of discarding useful information
5886 from the current cache line when executing this branch and there is
5887 an alignment, against that of fetching unneeded insn in front of the
5888 branch target when there is no alignment. */
5890 /* There are two delay_slot cases to consider. One is the simple case
5891 where the preceding branch is to the insn beyond the barrier (simple
5892 delay slot filling), and the other is where the preceding branch has
5893 a delay slot that is a duplicate of the insn after the barrier
5894 (fill_eager_delay_slots) and the branch is to the insn after the insn
5895 after the barrier. */
5898 bool jump_to_next = false;
5900 /* Skip to the insn before the JUMP_INSN before the barrier under
5902 rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
5904 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5905 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5906 prev = prev_real_insn (prev))
5908 jump_to_next = false;
5909 if (GET_CODE (PATTERN (prev)) == USE
5910 || GET_CODE (PATTERN (prev)) == CLOBBER)
5912 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5914 prev = XVECEXP (PATTERN (prev), 0, 1);
5915 if (INSN_UID (prev) == INSN_UID (next))
5917 /* Delay slot was filled with insn at jump target. */
5918 jump_to_next = true;
5924 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5926 credit -= get_attr_length (prev);
5928 if (prev && jump_to_label_p (prev))
5932 || next_real_insn (JUMP_LABEL (prev)) == next
5933 /* If relax_delay_slots() decides NEXT was redundant
5934 with some previous instruction, it will have
5935 redirected PREV's jump to the following insn. */
5936 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5937 /* There is no upper bound on redundant instructions
5938 that might have been skipped, but we must not put an
5939 alignment where none had been before. */
5940 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5942 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5943 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5944 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5946 rtx pat = PATTERN (prev);
5947 if (GET_CODE (pat) == PARALLEL)
5948 pat = XVECEXP (pat, 0, 0);
5949 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5955 return align_jumps_log;
5958 /* If we are inside a phony loop, almost any kind of label can turn up as the
5959 first one in the loop. Aligning a braf label causes incorrect switch
5960 destination addresses; we can detect braf labels because they are
5961 followed by a BARRIER.
5962 Applying loop alignment to small constant or switch tables is a waste
5963 of space, so we suppress this too. */
5965 sh_loop_align (rtx label)
5969 if (! optimize || optimize_size)
5973 next = next_nonnote_insn (next);
5974 while (next && LABEL_P (next));
5978 || recog_memoized (next) == CODE_FOR_consttable_2)
5981 return align_loops_log;
5984 /* Do a final pass over the function, just before delayed branch
5989 rtx_insn *first, *insn, *mova = NULL;
5991 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5992 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5994 first = get_insns ();
5995 max_labelno_before_reorg = max_label_num ();
5997 /* We must split call insns before introducing `mova's. If we're
5998 optimizing, they'll have already been split. Otherwise, make
5999 sure we don't split them too late. */
6001 split_all_insns_noflow ();
6006 /* If relaxing, generate pseudo-ops to associate function calls with
6007 the symbols they call. It does no harm to not generate these
6008 pseudo-ops. However, when we can generate them, it enables the
6009 linker to potentially relax the jsr to a bsr, and eliminate the
6010 register load and, possibly, the constant pool entry. */
6012 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6015 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6016 own purposes. This works because none of the remaining passes
6017 need to look at them.
6019 ??? But it may break in the future. We should use a machine
6020 dependent REG_NOTE, or some other approach entirely. */
6021 for (insn = first; insn; insn = NEXT_INSN (insn))
6027 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6029 remove_note (insn, note);
6033 for (insn = first; insn; insn = NEXT_INSN (insn))
6035 rtx pattern, reg, set, dies, label;
6036 rtx_insn *link, *scan;
6037 int rescan = 0, foundinsn = 0;
6041 pattern = PATTERN (insn);
6043 if (GET_CODE (pattern) == PARALLEL)
6044 pattern = XVECEXP (pattern, 0, 0);
6045 if (GET_CODE (pattern) == SET)
6046 pattern = SET_SRC (pattern);
6048 if (GET_CODE (pattern) != CALL
6049 || !MEM_P (XEXP (pattern, 0)))
6052 reg = XEXP (XEXP (pattern, 0), 0);
6056 reg = sfunc_uses_reg (insn);
6064 /* Try scanning backward to find where the register is set. */
6066 for (scan = PREV_INSN (insn);
6067 scan && !LABEL_P (scan);
6068 scan = PREV_INSN (scan))
6070 if (! INSN_P (scan))
6073 if (! reg_mentioned_p (reg, scan))
6076 if (noncall_uses_reg (reg, scan, &set))
6089 /* The register is set at LINK. */
6091 /* We can only optimize the function call if the register is
6092 being set to a symbol. In theory, we could sometimes
6093 optimize calls to a constant location, but the assembler
6094 and linker do not support that at present. */
6095 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6096 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6099 /* Scan forward from LINK to the place where REG dies, and
6100 make sure that the only insns which use REG are
6101 themselves function calls. */
6103 /* ??? This doesn't work for call targets that were allocated
6104 by reload, since there may not be a REG_DEAD note for the
6108 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6112 /* Don't try to trace forward past a CODE_LABEL if we haven't
6113 seen INSN yet. Ordinarily, we will only find the setting insn
6114 if it is in the same basic block. However,
6115 cross-jumping can insert code labels in between the load and
6116 the call, and can result in situations where a single call
6117 insn may have two targets depending on where we came from. */
6119 if (LABEL_P (scan) && ! foundinsn)
6122 if (! INSN_P (scan))
6125 /* Don't try to trace forward past a JUMP. To optimize
6126 safely, we would have to check that all the
6127 instructions at the jump destination did not use REG. */
6132 if (! reg_mentioned_p (reg, scan))
6135 if (noncall_uses_reg (reg, scan, &scanset))
6142 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6144 /* There is a function call to this register other
6145 than the one we are checking. If we optimize
6146 this call, we need to rescan again below. */
6150 /* ??? We shouldn't have to worry about SCANSET here.
6151 We should just be able to check for a REG_DEAD note
6152 on a function call. However, the REG_DEAD notes are
6153 apparently not dependable around libcalls; c-torture
6154 execute/920501-2 is a test case. If SCANSET is set,
6155 then this insn sets the register, so it must have
6156 died earlier. Unfortunately, this will only handle
6157 the cases in which the register is, in fact, set in a
6160 /* ??? We shouldn't have to use FOUNDINSN here.
6161 This dates back to when we used LOG_LINKS to find
6162 the most recent insn which sets the register. */
6166 || find_reg_note (scan, REG_DEAD, reg)))
6175 /* Either there was a branch, or some insn used REG
6176 other than as a function call address. */
6180 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6181 on the insn which sets the register, and on each call insn
6182 which uses the register. In final_prescan_insn we look for
6183 the REG_LABEL_OPERAND notes, and output the appropriate label
6186 label = gen_label_rtx ();
6187 add_reg_note (link, REG_LABEL_OPERAND, label);
6188 add_reg_note (insn, REG_LABEL_OPERAND, label);
6196 scan = NEXT_INSN (scan);
6199 && reg_mentioned_p (reg, scan))
6200 || ((reg2 = sfunc_uses_reg (scan))
6201 && REGNO (reg2) == REGNO (reg))))
6202 add_reg_note (scan, REG_LABEL_OPERAND, label);
6204 while (scan != dies);
6210 fixup_addr_diff_vecs (first);
6214 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6215 shorten_branches (first);
6218 /* Scan the function looking for move instructions which have to be
6219 changed to pc-relative loads and insert the literal tables. */
6220 label_ref_list_pool = create_alloc_pool ("label references list",
6221 sizeof (struct label_ref_list_d),
6223 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6224 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6228 /* ??? basic block reordering can move a switch table dispatch
6229 below the switch table. Check if that has happened.
6230 We only have the addresses available when optimizing; but then,
6231 this check shouldn't be needed when not optimizing. */
6232 if (!untangle_mova (&num_mova, &mova, insn))
6238 else if (JUMP_TABLE_DATA_P (insn)
6239 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6241 /* ??? loop invariant motion can also move a mova out of a
6242 loop. Since loop does this code motion anyway, maybe we
6243 should wrap UNSPEC_MOVA into a CONST, so that reload can
6246 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6247 || (prev_nonnote_insn (insn)
6248 == XEXP (MOVA_LABELREF (mova), 0))))
6255 /* Some code might have been inserted between the mova and
6256 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6257 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6258 total += get_attr_length (scan);
6260 /* range of mova is 1020, add 4 because pc counts from address of
6261 second instruction after this one, subtract 2 in case pc is 2
6262 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6263 cancels out with alignment effects of the mova itself. */
6266 /* Change the mova into a load, and restart scanning
6267 there. broken_move will then return true for mova. */
6272 if (broken_move (insn)
6273 || (NONJUMP_INSN_P (insn)
6274 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6277 /* Scan ahead looking for a barrier to stick the constant table
6279 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6280 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6281 int need_aligned_label = 0;
6283 if (num_mova && ! mova_p (mova))
6285 /* find_barrier had to change the first mova into a
6286 pcload; thus, we have to start with this new pcload. */
6290 /* Now find all the moves between the points and modify them. */
6291 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6295 if (NONJUMP_INSN_P (scan)
6296 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6297 need_aligned_label = 1;
6298 if (broken_move (scan))
6300 rtx *patp = &PATTERN (scan), pat = *patp;
6304 enum machine_mode mode;
6306 if (GET_CODE (pat) == PARALLEL)
6307 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6308 src = SET_SRC (pat);
6309 dst = SET_DEST (pat);
6310 mode = GET_MODE (dst);
6312 if (mode == SImode && satisfies_constraint_I16 (src)
6313 && REGNO (dst) != FPUL_REG)
6318 while (GET_CODE (dst) == SUBREG)
6320 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6321 GET_MODE (SUBREG_REG (dst)),
6324 dst = SUBREG_REG (dst);
6326 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6328 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6330 /* This must be an insn that clobbers r0. */
6331 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6332 XVECLEN (PATTERN (scan), 0)
6334 rtx clobber = *clobberp;
6336 gcc_assert (GET_CODE (clobber) == CLOBBER
6337 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6340 && reg_set_between_p (r0_rtx, last_float_move, scan))
6344 && GET_MODE_SIZE (mode) != 4
6345 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6347 lab = add_constant (src, mode, last_float);
6349 emit_insn_before (gen_mova (lab), scan);
6352 /* There will be a REG_UNUSED note for r0 on
6353 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6354 lest reorg:mark_target_live_regs will not
6355 consider r0 to be used, and we end up with delay
6356 slot insn in front of SCAN that clobbers r0. */
6358 = find_regno_note (last_float_move, REG_UNUSED, 0);
6360 /* If we are not optimizing, then there may not be
6363 PUT_REG_NOTE_KIND (note, REG_INC);
6365 *last_float_addr = r0_inc_rtx;
6367 last_float_move = scan;
6369 newsrc = gen_const_mem (mode,
6370 (((TARGET_SH4 && ! TARGET_FMOVD)
6371 || REGNO (dst) == FPUL_REG)
6374 last_float_addr = &XEXP (newsrc, 0);
6376 /* Remove the clobber of r0. */
6377 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6378 gen_rtx_SCRATCH (Pmode));
6380 /* This is a mova needing a label. Create it. */
6381 else if (GET_CODE (src) == UNSPEC
6382 && XINT (src, 1) == UNSPEC_MOVA
6383 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6385 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6386 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6387 newsrc = gen_rtx_UNSPEC (SImode,
6388 gen_rtvec (1, newsrc),
6391 else if (GET_CODE (src) == UNSPEC_VOLATILE
6392 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6394 newsrc = XVECEXP (src, 0, 0);
6395 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6396 INSN_CODE (scan) = -1;
6401 lab = add_constant (src, mode, 0);
6402 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6403 newsrc = gen_const_mem (mode, newsrc);
6405 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6406 INSN_CODE (scan) = -1;
6409 dump_table (need_aligned_label ? insn : 0, barrier);
6413 free_alloc_pool (label_ref_list_pool);
6414 for (insn = first; insn; insn = NEXT_INSN (insn))
6415 PUT_MODE (insn, VOIDmode);
6417 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6418 INSN_ADDRESSES_FREE ();
6419 split_branches (first);
6421 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6422 also has an effect on the register that holds the address of the sfunc.
6423 Insert an extra dummy insn in front of each sfunc that pretends to
6424 use this register. */
6425 if (flag_delayed_branch)
6427 for (insn = first; insn; insn = NEXT_INSN (insn))
6429 rtx reg = sfunc_uses_reg (insn);
6433 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6437 /* fpscr is not actually a user variable, but we pretend it is for the
6438 sake of the previous optimization passes, since we want it handled like
6439 one. However, we don't have any debugging information for it, so turn
6440 it into a non-user variable now. */
6442 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6444 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6447 /* Return the UID of the insn that follows the specified label. */
6449 get_dest_uid (rtx label, int max_uid)
6451 rtx dest = next_real_insn (label);
6454 /* This can happen for an undefined label. */
6456 dest_uid = INSN_UID (dest);
6457 /* If this is a newly created branch redirection blocking instruction,
6458 we cannot index the branch_uid or insn_addresses arrays with its
6459 uid. But then, we won't need to, because the actual destination is
6460 the following branch. */
6461 while (dest_uid >= max_uid)
6463 dest = NEXT_INSN (dest);
6464 dest_uid = INSN_UID (dest);
6466 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6471 /* Split condbranches that are out of range. Also add clobbers for
6472 scratch registers that are needed in far jumps.
6473 We do this before delay slot scheduling, so that it can take our
6474 newly created instructions into account. It also allows us to
6475 find branches with common targets more easily. */
6477 split_branches (rtx_insn *first)
6480 struct far_branch **uid_branch, *far_branch_list = 0;
6481 int max_uid = get_max_uid ();
6484 /* Find out which branches are out of range. */
6485 shorten_branches (first);
6487 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6488 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6490 for (insn = first; insn; insn = NEXT_INSN (insn))
6491 if (! INSN_P (insn))
6493 else if (INSN_DELETED_P (insn))
6495 /* Shorten_branches would split this instruction again,
6496 so transform it into a note. */
6497 SET_INSN_DELETED (insn);
6499 else if (JUMP_P (insn))
6501 enum attr_type type = get_attr_type (insn);
6502 if (type == TYPE_CBRANCH)
6506 if (get_attr_length (insn) > 4)
6508 rtx src = SET_SRC (PATTERN (insn));
6509 rtx olabel = XEXP (XEXP (src, 1), 0);
6510 int addr = INSN_ADDRESSES (INSN_UID (insn));
6512 int dest_uid = get_dest_uid (olabel, max_uid);
6513 struct far_branch *bp = uid_branch[dest_uid];
6515 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6516 the label if the LABEL_NUSES count drops to zero. There is
6517 always a jump_optimize pass that sets these values, but it
6518 proceeds to delete unreferenced code, and then if not
6519 optimizing, to un-delete the deleted instructions, thus
6520 leaving labels with too low uses counts. */
6523 JUMP_LABEL (insn) = olabel;
6524 LABEL_NUSES (olabel)++;
6528 bp = (struct far_branch *) alloca (sizeof *bp);
6529 uid_branch[dest_uid] = bp;
6530 bp->prev = far_branch_list;
6531 far_branch_list = bp;
6533 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6534 LABEL_NUSES (bp->far_label)++;
6538 label = bp->near_label;
6539 if (! label && bp->address - addr >= CONDJUMP_MIN)
6541 rtx block = bp->insert_place;
6543 if (GET_CODE (PATTERN (block)) == RETURN)
6544 block = PREV_INSN (block);
6546 block = gen_block_redirect (block,
6548 label = emit_label_after (gen_label_rtx (),
6550 bp->near_label = label;
6552 else if (label && ! NEXT_INSN (label))
6554 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6555 bp->insert_place = insn;
6557 gen_far_branch (bp);
6561 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6563 bp->near_label = label = gen_label_rtx ();
6564 bp->insert_place = insn;
6567 ok = redirect_jump (insn, label, 0);
6572 /* get_attr_length (insn) == 2 */
6573 /* Check if we have a pattern where reorg wants to redirect
6574 the branch to a label from an unconditional branch that
6576 /* We can't use JUMP_LABEL here because it might be undefined
6577 when not optimizing. */
6578 /* A syntax error might cause beyond to be NULL_RTX. */
6580 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6585 || ((beyond = next_active_insn (beyond))
6586 && JUMP_P (beyond)))
6587 && GET_CODE (PATTERN (beyond)) == SET
6588 && recog_memoized (beyond) == CODE_FOR_jump_compact
6590 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6591 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6593 gen_block_redirect (beyond,
6594 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6597 next = next_active_insn (insn);
6601 || ((next = next_active_insn (next))
6603 && GET_CODE (PATTERN (next)) == SET
6604 && recog_memoized (next) == CODE_FOR_jump_compact
6606 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6607 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6609 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6611 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6613 int addr = INSN_ADDRESSES (INSN_UID (insn));
6616 struct far_branch *bp;
6618 if (type == TYPE_JUMP)
6620 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6621 dest_uid = get_dest_uid (far_label, max_uid);
6624 /* Parse errors can lead to labels outside
6626 if (! NEXT_INSN (far_label))
6631 JUMP_LABEL (insn) = far_label;
6632 LABEL_NUSES (far_label)++;
6634 redirect_jump (insn, ret_rtx, 1);
6638 bp = uid_branch[dest_uid];
6641 bp = (struct far_branch *) alloca (sizeof *bp);
6642 uid_branch[dest_uid] = bp;
6643 bp->prev = far_branch_list;
6644 far_branch_list = bp;
6646 bp->far_label = far_label;
6648 LABEL_NUSES (far_label)++;
6650 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6651 if (addr - bp->address <= CONDJUMP_MAX)
6652 emit_label_after (bp->near_label, PREV_INSN (insn));
6655 gen_far_branch (bp);
6661 bp->insert_place = insn;
6663 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6665 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6668 /* Generate all pending far branches,
6669 and free our references to the far labels. */
6670 while (far_branch_list)
6672 if (far_branch_list->near_label
6673 && ! NEXT_INSN (far_branch_list->near_label))
6674 gen_far_branch (far_branch_list);
6676 && far_branch_list->far_label
6677 && ! --LABEL_NUSES (far_branch_list->far_label))
6678 delete_insn (far_branch_list->far_label);
6679 far_branch_list = far_branch_list->prev;
6682 /* Instruction length information is no longer valid due to the new
6683 instructions that have been generated. */
6684 init_insn_lengths ();
6687 /* Dump out instruction addresses, which is useful for debugging the
6688 constant pool table stuff.
6690 If relaxing, output the label and pseudo-ops used to link together
6691 calls and the instruction which set the registers.
6693 ??? The addresses printed by this routine for insns are nonsense for
6694 insns which are inside of a sequence where none of the inner insns have
6695 variable length. This is because the second pass of shorten_branches
6696 does not bother to update them. */
6698 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6699 int noperands ATTRIBUTE_UNUSED)
6701 if (TARGET_DUMPISIZE)
6702 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6708 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6713 pattern = PATTERN (insn);
6714 if (GET_CODE (pattern) == PARALLEL)
6715 pattern = XVECEXP (pattern, 0, 0);
6716 switch (GET_CODE (pattern))
6719 if (GET_CODE (SET_SRC (pattern)) != CALL
6720 && get_attr_type (insn) != TYPE_SFUNC)
6722 targetm.asm_out.internal_label
6723 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6726 /* else FALLTHROUGH */
6728 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6729 CODE_LABEL_NUMBER (XEXP (note, 0)));
6739 /* Dump out any constants accumulated in the final pass. These will
6742 output_jump_label_table (void)
6748 fprintf (asm_out_file, "\t.align 2\n");
6749 for (i = 0; i < pool_size; i++)
6751 pool_node *p = &pool_vector[i];
6753 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6754 CODE_LABEL_NUMBER (p->label));
6755 output_asm_insn (".long %O0", &p->value);
6763 /* A full frame looks like:
6767 [ if current_function_anonymous_args
6780 local-0 <- fp points here.
6782 Number of bytes pushed for anonymous args, used to pass information
6783 between expand_prologue and expand_epilogue.
6785 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6786 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6787 for an epilogue and a negative value means that it's for a sibcall
6788 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6789 all the registers that are about to be restored, and hence dead. */
6791 output_stack_adjust (int size, rtx reg, int epilogue_p,
6792 HARD_REG_SET *live_regs_mask, bool frame_p)
6794 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6797 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6799 /* This test is bogus, as output_stack_adjust is used to re-align the
6802 gcc_assert (!(size % align));
6805 if (CONST_OK_FOR_ADD (size))
6806 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6807 /* Try to do it with two partial adjustments; however, we must make
6808 sure that the stack is properly aligned at all times, in case
6809 an interrupt occurs between the two partial adjustments. */
6810 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6811 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6813 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6814 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6820 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6823 /* If TEMP is invalid, we could temporarily save a general
6824 register to MACL. However, there is currently no need
6825 to handle this case, so just die when we see it. */
6827 || current_function_interrupt
6828 || ! call_really_used_regs[temp] || fixed_regs[temp])
6830 if (temp < 0 && ! current_function_interrupt
6831 && (TARGET_SHMEDIA || epilogue_p >= 0))
6834 COPY_HARD_REG_SET (temps, call_used_reg_set);
6835 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6839 if (crtl->return_rtx)
6841 enum machine_mode mode;
6842 mode = GET_MODE (crtl->return_rtx);
6843 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6844 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6846 for (i = 0; i < nreg; i++)
6847 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6848 if (crtl->calls_eh_return)
6850 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6851 for (i = 0; i <= 3; i++)
6852 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6855 if (TARGET_SHMEDIA && epilogue_p < 0)
6856 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6857 CLEAR_HARD_REG_BIT (temps, i);
6858 if (epilogue_p <= 0)
6860 for (i = FIRST_PARM_REG;
6861 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6862 CLEAR_HARD_REG_BIT (temps, i);
6863 if (cfun->static_chain_decl != NULL)
6864 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6866 temp = scavenge_reg (&temps);
6868 if (temp < 0 && live_regs_mask)
6872 COPY_HARD_REG_SET (temps, *live_regs_mask);
6873 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6874 temp = scavenge_reg (&temps);
6878 rtx adj_reg, tmp_reg, mem;
6880 /* If we reached here, the most likely case is the (sibcall)
6881 epilogue for non SHmedia. Put a special push/pop sequence
6882 for such case as the last resort. This looks lengthy but
6883 would not be problem because it seems to be very
6886 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6889 /* ??? There is still the slight possibility that r4 or
6890 r5 have been reserved as fixed registers or assigned
6891 as global registers, and they change during an
6892 interrupt. There are possible ways to handle this:
6894 - If we are adjusting the frame pointer (r14), we can do
6895 with a single temp register and an ordinary push / pop
6897 - Grab any call-used or call-saved registers (i.e. not
6898 fixed or globals) for the temps we need. We might
6899 also grab r14 if we are adjusting the stack pointer.
6900 If we can't find enough available registers, issue
6901 a diagnostic and die - the user must have reserved
6902 way too many registers.
6903 But since all this is rather unlikely to happen and
6904 would require extra testing, we just die if r4 / r5
6905 are not available. */
6906 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6907 && !global_regs[4] && !global_regs[5]);
6909 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6910 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6911 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6912 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6913 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6914 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6915 emit_move_insn (mem, tmp_reg);
6916 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6917 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6918 emit_move_insn (mem, tmp_reg);
6919 emit_move_insn (reg, adj_reg);
6920 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6921 emit_move_insn (adj_reg, mem);
6922 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6923 emit_move_insn (tmp_reg, mem);
6924 /* Tell flow the insns that pop r4/r5 aren't dead. */
6929 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6931 /* If SIZE is negative, subtract the positive value.
6932 This sometimes allows a constant pool entry to be shared
6933 between prologue and epilogue code. */
6936 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6937 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6941 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6942 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6944 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6945 gen_rtx_SET (VOIDmode, reg,
6946 gen_rtx_PLUS (SImode, reg,
6952 /* Emit the specified insn and mark it as frame related.
6953 FIXME: Rename this to emit_frame_insn. */
6957 rtx_insn *insn = emit_insn (x);
6958 RTX_FRAME_RELATED_P (insn) = 1;
6962 /* Output RTL to push register RN onto the stack. */
6968 x = gen_push_fpul ();
6969 else if (rn == FPSCR_REG)
6970 x = gen_push_fpscr ();
6971 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6972 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6974 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6976 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6978 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6979 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6981 x = gen_push (gen_rtx_REG (SImode, rn));
6984 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6988 /* Output RTL to pop register RN from the stack. */
6994 x = gen_pop_fpul ();
6995 else if (rn == FPSCR_REG)
6996 x = gen_pop_fpscr ();
6997 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6998 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7000 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7002 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7004 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7005 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7007 x = gen_pop (gen_rtx_REG (SImode, rn));
7011 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7012 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7013 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7014 : SET_DEST (PATTERN (x)));
7015 add_reg_note (x, REG_CFA_RESTORE, reg);
7016 add_reg_note (x, REG_CFA_ADJUST_CFA,
7017 gen_rtx_SET (SImode, sp_reg,
7018 plus_constant (SImode, sp_reg,
7019 GET_MODE_SIZE (GET_MODE (reg)))));
7020 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7021 RTX_FRAME_RELATED_P (x) = 1;
7024 /* Generate code to push the regs specified in the mask. */
7026 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7028 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7031 /* Push PR last; this gives better latencies after the prologue, and
7032 candidates for the return delay slot when there are no general
7033 registers pushed. */
7034 for (; i < FIRST_PSEUDO_REGISTER; i++)
7036 /* If this is an interrupt handler, and the SZ bit varies,
7037 and we have to push any floating point register, we need
7038 to switch to the correct precision first. */
7039 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7040 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7042 HARD_REG_SET unsaved;
7045 COMPL_HARD_REG_SET (unsaved, *mask);
7046 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7050 && (i != FPSCR_REG || ! skip_fpscr)
7051 && TEST_HARD_REG_BIT (*mask, i))
7053 /* If the ISR has RESBANK attribute assigned, don't push any of
7054 the following registers - R0-R14, MACH, MACL and GBR. */
7055 if (! (sh_cfun_resbank_handler_p ()
7056 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7064 /* Push banked registers last to improve delay slot opportunities. */
7065 if (interrupt_handler)
7067 bool use_movml = false;
7071 unsigned int count = 0;
7073 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7074 if (TEST_HARD_REG_BIT (*mask, i))
7079 /* Use movml when all banked registers are pushed. */
7080 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7084 if (sh_cfun_resbank_handler_p ())
7088 rtx x, mem, reg, set;
7089 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7091 /* We must avoid scheduling multiple store insn with another
7093 emit_insn (gen_blockage ());
7094 x = gen_movml_push_banked (sp_reg);
7096 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7098 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7099 reg = gen_rtx_REG (SImode, i);
7100 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7103 set = gen_rtx_SET (SImode, sp_reg,
7104 plus_constant (Pmode, sp_reg, - 32));
7105 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7106 emit_insn (gen_blockage ());
7109 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7110 if (TEST_HARD_REG_BIT (*mask, i))
7114 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7115 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7119 /* Calculate how much extra space is needed to save all callee-saved
7121 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7123 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7126 int stack_space = 0;
7127 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7129 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7130 if ((! call_really_used_regs[reg] || interrupt_handler)
7131 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7132 /* Leave space to save this target register on the stack,
7133 in case target register allocation wants to use it. */
7134 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7138 /* Decide whether we should reserve space for callee-save target registers,
7139 in case target register allocation wants to use them. REGS_SAVED is
7140 the space, in bytes, that is already required for register saves.
7141 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7143 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7144 HARD_REG_SET *live_regs_mask)
7148 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7151 /* Decide how much space to reserve for callee-save target registers
7152 in case target register allocation wants to use them.
7153 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7155 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7157 if (shmedia_space_reserved_for_target_registers)
7158 return shmedia_target_regs_stack_space (live_regs_mask);
7163 /* Work out the registers which need to be saved, both as a mask and a
7164 count of saved words. Return the count.
7166 If doing a pragma interrupt function, then push all regs used by the
7167 function, and if we call another function (we can tell by looking at PR),
7168 make sure that all the regs it clobbers are safe too. */
7170 calc_live_regs (HARD_REG_SET *live_regs_mask)
7175 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7176 bool nosave_low_regs;
7177 int pr_live, has_call;
7179 attrs = DECL_ATTRIBUTES (current_function_decl);
7180 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7181 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7182 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7183 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7185 CLEAR_HARD_REG_SET (*live_regs_mask);
7186 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7187 && df_regs_ever_live_p (FPSCR_REG))
7188 target_flags &= ~MASK_FPU_SINGLE;
7189 /* If we can save a lot of saves by switching to double mode, do that. */
7190 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7191 && TARGET_FPU_SINGLE)
7192 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7193 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7194 && (! call_really_used_regs[reg]
7195 || interrupt_handler)
7198 target_flags &= ~MASK_FPU_SINGLE;
7201 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7202 knows how to use it. That means the pseudo originally allocated for
7203 the initial value can become the PR_MEDIA_REG hard register, as seen for
7204 execute/20010122-1.c:test9. */
7206 /* ??? this function is called from initial_elimination_offset, hence we
7207 can't use the result of sh_media_register_for_return here. */
7208 pr_live = sh_pr_n_sets ();
7211 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7212 pr_live = (pr_initial
7213 ? (!REG_P (pr_initial)
7214 || REGNO (pr_initial) != (PR_REG))
7215 : df_regs_ever_live_p (PR_REG));
7216 /* For Shcompact, if not optimizing, we end up with a memory reference
7217 using the return address pointer for __builtin_return_address even
7218 though there is no actual need to put the PR register on the stack. */
7219 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7221 /* Force PR to be live if the prologue has to call the SHmedia
7222 argument decoder or register saver. */
7223 if (TARGET_SHCOMPACT
7224 && ((crtl->args.info.call_cookie
7225 & ~ CALL_COOKIE_RET_TRAMP (1))
7226 || crtl->saves_all_registers))
7228 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7229 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7231 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7234 ? (/* Need to save all the regs ever live. */
7235 (df_regs_ever_live_p (reg)
7236 || (call_really_used_regs[reg]
7237 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7238 || reg == PIC_OFFSET_TABLE_REGNUM)
7240 || (TARGET_SHMEDIA && has_call
7241 && REGISTER_NATURAL_MODE (reg) == SImode
7242 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7243 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7244 && reg != RETURN_ADDRESS_POINTER_REGNUM
7245 && reg != T_REG && reg != GBR_REG
7246 /* Push fpscr only on targets which have FPU */
7247 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7248 : (/* Only push those regs which are used and need to be saved. */
7251 && crtl->args.info.call_cookie
7252 && reg == PIC_OFFSET_TABLE_REGNUM)
7253 || (df_regs_ever_live_p (reg)
7254 && ((!call_really_used_regs[reg]
7255 && !(reg != PIC_OFFSET_TABLE_REGNUM
7256 && fixed_regs[reg] && call_used_regs[reg]))
7257 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7258 || (crtl->calls_eh_return
7259 && (reg == EH_RETURN_DATA_REGNO (0)
7260 || reg == EH_RETURN_DATA_REGNO (1)
7261 || reg == EH_RETURN_DATA_REGNO (2)
7262 || reg == EH_RETURN_DATA_REGNO (3)))
7263 || ((reg == MACL_REG || reg == MACH_REG)
7264 && df_regs_ever_live_p (reg)
7265 && sh_cfun_attr_renesas_p ())
7268 SET_HARD_REG_BIT (*live_regs_mask, reg);
7269 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7271 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7272 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7274 if (FP_REGISTER_P (reg))
7276 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7278 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7279 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7282 else if (XD_REGISTER_P (reg))
7284 /* Must switch to double mode to access these registers. */
7285 target_flags &= ~MASK_FPU_SINGLE;
7289 if (nosave_low_regs && reg == R8_REG)
7292 /* If we have a target register optimization pass after prologue / epilogue
7293 threading, we need to assume all target registers will be live even if
7295 if (flag_branch_target_load_optimize2
7296 && TARGET_SAVE_ALL_TARGET_REGS
7297 && shmedia_space_reserved_for_target_registers)
7298 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7299 if ((! call_really_used_regs[reg] || interrupt_handler)
7300 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7302 SET_HARD_REG_BIT (*live_regs_mask, reg);
7303 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7305 /* If this is an interrupt handler, we don't have any call-clobbered
7306 registers we can conveniently use for target register save/restore.
7307 Make sure we save at least one general purpose register when we need
7308 to save target registers. */
7309 if (interrupt_handler
7310 && hard_reg_set_intersect_p (*live_regs_mask,
7311 reg_class_contents[TARGET_REGS])
7312 && ! hard_reg_set_intersect_p (*live_regs_mask,
7313 reg_class_contents[GENERAL_REGS]))
7315 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7316 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7322 /* Code to generate prologue and epilogue sequences */
7324 /* PUSHED is the number of bytes that are being pushed on the
7325 stack for register saves. Return the frame size, padded
7326 appropriately so that the stack stays properly aligned. */
7327 static HOST_WIDE_INT
7328 rounded_frame_size (int pushed)
7330 HOST_WIDE_INT size = get_frame_size ();
7331 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7333 if (ACCUMULATE_OUTGOING_ARGS)
7334 size += crtl->outgoing_args_size;
7336 return ((size + pushed + align - 1) & -align) - pushed;
7339 /* Choose a call-clobbered target-branch register that remains
7340 unchanged along the whole function. We set it up as the return
7341 value in the prologue. */
7343 sh_media_register_for_return (void)
7348 if (! crtl->is_leaf)
7350 if (lookup_attribute ("interrupt_handler",
7351 DECL_ATTRIBUTES (current_function_decl)))
7353 if (sh_cfun_interrupt_handler_p ())
7356 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7358 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7359 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7365 /* The maximum registers we need to save are:
7366 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7367 - 32 floating point registers (for each pair, we save none,
7368 one single precision value, or a double precision value).
7369 - 8 target registers
7370 - add 1 entry for a delimiter. */
7371 #define MAX_SAVED_REGS (62+32+8)
7373 typedef struct save_entry_s
7382 /* There will be a delimiter entry with VOIDmode both at the start and the
7383 end of a filled in schedule. The end delimiter has the offset of the
7384 save with the smallest (i.e. most negative) offset. */
7385 typedef struct save_schedule_s
7387 save_entry entries[MAX_SAVED_REGS + 2];
7388 int temps[MAX_TEMPS+1];
7391 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7392 use reverse order. Returns the last entry written to (not counting
7393 the delimiter). OFFSET_BASE is a number to be added to all offset
7396 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7400 save_entry *entry = schedule->entries;
7404 if (! current_function_interrupt)
7405 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7406 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7407 && ! FUNCTION_ARG_REGNO_P (i)
7408 && i != FIRST_RET_REG
7409 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7410 && ! (crtl->calls_eh_return
7411 && (i == EH_RETURN_STACKADJ_REGNO
7412 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7413 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7414 schedule->temps[tmpx++] = i;
7416 entry->mode = VOIDmode;
7417 entry->offset = offset_base;
7419 /* We loop twice: first, we save 8-byte aligned registers in the
7420 higher addresses, that are known to be aligned. Then, we
7421 proceed to saving 32-bit registers that don't need 8-byte
7423 If this is an interrupt function, all registers that need saving
7424 need to be saved in full. moreover, we need to postpone saving
7425 target registers till we have saved some general purpose registers
7426 we can then use as scratch registers. */
7427 offset = offset_base;
7428 for (align = 1; align >= 0; align--)
7430 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7431 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7433 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7436 if (current_function_interrupt)
7438 if (TARGET_REGISTER_P (i))
7440 if (GENERAL_REGISTER_P (i))
7443 if (mode == SFmode && (i % 2) == 1
7444 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7445 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7452 /* If we're doing the aligned pass and this is not aligned,
7453 or we're doing the unaligned pass and this is aligned,
7455 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7459 if (current_function_interrupt
7460 && GENERAL_REGISTER_P (i)
7461 && tmpx < MAX_TEMPS)
7462 schedule->temps[tmpx++] = i;
7464 offset -= GET_MODE_SIZE (mode);
7467 entry->offset = offset;
7470 if (align && current_function_interrupt)
7471 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7472 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7474 offset -= GET_MODE_SIZE (DImode);
7476 entry->mode = DImode;
7477 entry->offset = offset;
7482 entry->mode = VOIDmode;
7483 entry->offset = offset;
7484 schedule->temps[tmpx] = -1;
7488 /* Expand code for the function prologue. */
7490 sh_expand_prologue (void)
7492 HARD_REG_SET live_regs_mask;
7495 int save_flags = target_flags;
7499 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7501 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7503 /* We have pretend args if we had an object sent partially in registers
7504 and partially on the stack, e.g. a large structure. */
7505 pretend_args = crtl->args.pretend_args_size;
7506 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7507 && (NPARM_REGS(SImode)
7508 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7511 output_stack_adjust (-pretend_args
7512 - crtl->args.info.stack_regs * 8,
7513 stack_pointer_rtx, 0, NULL, true);
7514 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7516 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7517 /* We're going to use the PIC register to load the address of the
7518 incoming-argument decoder and/or of the return trampoline from
7519 the GOT, so make sure the PIC register is preserved and
7521 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7523 if (TARGET_SHCOMPACT
7524 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7528 /* First, make all registers with incoming arguments that will
7529 be pushed onto the stack live, so that register renaming
7530 doesn't overwrite them. */
7531 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7532 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7533 >= NPARM_REGS (SImode) - reg)
7534 for (; reg < NPARM_REGS (SImode); reg++)
7535 emit_insn (gen_shcompact_preserve_incoming_args
7536 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7537 else if (CALL_COOKIE_INT_REG_GET
7538 (crtl->args.info.call_cookie, reg) == 1)
7539 emit_insn (gen_shcompact_preserve_incoming_args
7540 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7542 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7544 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7545 GEN_INT (crtl->args.info.call_cookie));
7546 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7547 gen_rtx_REG (SImode, R0_REG));
7549 else if (TARGET_SHMEDIA)
7551 int tr = sh_media_register_for_return ();
7554 emit_move_insn (gen_rtx_REG (DImode, tr),
7555 gen_rtx_REG (DImode, PR_MEDIA_REG));
7558 /* Emit the code for SETUP_VARARGS. */
7561 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7563 /* Push arg regs as if they'd been provided by caller in stack. */
7564 for (i = 0; i < NPARM_REGS(SImode); i++)
7566 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7568 if (i >= (NPARM_REGS(SImode)
7569 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7573 stack_usage += GET_MODE_SIZE (SImode);
7578 /* If we're supposed to switch stacks at function entry, do so now. */
7582 /* The argument specifies a variable holding the address of the
7583 stack the interrupt function should switch to/from at entry/exit. */
7584 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7586 = ggc_strdup (TREE_STRING_POINTER (arg));
7587 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7589 lab = add_constant (sp_switch, SImode, 0);
7590 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7592 emit_insn (gen_sp_switch_1 (newsrc));
7595 d = calc_live_regs (&live_regs_mask);
7596 /* ??? Maybe we could save some switching if we can move a mode switch
7597 that already happens to be at the function start into the prologue. */
7598 if (target_flags != save_flags && ! current_function_interrupt)
7599 emit_insn (gen_toggle_sz ());
7603 int offset_base, offset;
7605 int offset_in_r0 = -1;
7607 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7608 int total_size, save_size;
7609 save_schedule schedule;
7613 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7614 && ! current_function_interrupt)
7615 r0 = gen_rtx_REG (Pmode, R0_REG);
7617 /* D is the actual number of bytes that we need for saving registers,
7618 however, in initial_elimination_offset we have committed to using
7619 an additional TREGS_SPACE amount of bytes - in order to keep both
7620 addresses to arguments supplied by the caller and local variables
7621 valid, we must keep this gap. Place it between the incoming
7622 arguments and the actually saved registers in a bid to optimize
7623 locality of reference. */
7624 total_size = d + tregs_space;
7625 total_size += rounded_frame_size (total_size);
7626 save_size = total_size - rounded_frame_size (d);
7627 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7628 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7629 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7631 /* If adjusting the stack in a single step costs nothing extra, do so.
7632 I.e. either if a single addi is enough, or we need a movi anyway,
7633 and we don't exceed the maximum offset range (the test for the
7634 latter is conservative for simplicity). */
7636 && (CONST_OK_FOR_I10 (-total_size)
7637 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7638 && total_size <= 2044)))
7639 d_rounding = total_size - save_size;
7641 offset_base = d + d_rounding;
7643 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7645 stack_usage += save_size + d_rounding;
7647 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7648 tmp_pnt = schedule.temps;
7649 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7651 enum machine_mode mode = (enum machine_mode) entry->mode;
7652 unsigned int reg = entry->reg;
7653 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7656 offset = entry->offset;
7658 reg_rtx = gen_rtx_REG (mode, reg);
7660 mem_rtx = gen_frame_mem (mode,
7661 gen_rtx_PLUS (Pmode,
7665 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7671 if (HAVE_PRE_DECREMENT
7672 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7673 || mem_rtx == NULL_RTX
7674 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7676 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7678 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7683 offset += GET_MODE_SIZE (mode);
7687 if (mem_rtx != NULL_RTX)
7690 if (offset_in_r0 == -1)
7692 emit_move_insn (r0, GEN_INT (offset));
7693 offset_in_r0 = offset;
7695 else if (offset != offset_in_r0)
7700 GEN_INT (offset - offset_in_r0)));
7701 offset_in_r0 += offset - offset_in_r0;
7704 if (pre_dec != NULL_RTX)
7710 (Pmode, r0, stack_pointer_rtx));
7714 offset -= GET_MODE_SIZE (mode);
7715 offset_in_r0 -= GET_MODE_SIZE (mode);
7720 mem_rtx = gen_frame_mem (mode, r0);
7722 mem_rtx = gen_frame_mem (mode,
7723 gen_rtx_PLUS (Pmode,
7727 /* We must not use an r0-based address for target-branch
7728 registers or for special registers without pre-dec
7729 memory addresses, since we store their values in r0
7731 gcc_assert (!TARGET_REGISTER_P (reg)
7732 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7733 || mem_rtx == pre_dec));
7736 orig_reg_rtx = reg_rtx;
7737 if (TARGET_REGISTER_P (reg)
7738 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7739 && mem_rtx != pre_dec))
7741 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7743 emit_move_insn (tmp_reg, reg_rtx);
7745 if (REGNO (tmp_reg) == R0_REG)
7749 gcc_assert (!refers_to_regno_p
7750 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7753 if (*++tmp_pnt <= 0)
7754 tmp_pnt = schedule.temps;
7761 /* Mark as interesting for dwarf cfi generator */
7762 insn = emit_move_insn (mem_rtx, reg_rtx);
7763 RTX_FRAME_RELATED_P (insn) = 1;
7764 /* If we use an intermediate register for the save, we can't
7765 describe this exactly in cfi as a copy of the to-be-saved
7766 register into the temporary register and then the temporary
7767 register on the stack, because the temporary register can
7768 have a different natural size than the to-be-saved register.
7769 Thus, we gloss over the intermediate copy and pretend we do
7770 a direct save from the to-be-saved register. */
7771 if (REGNO (reg_rtx) != reg)
7775 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7776 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7779 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7781 rtx reg_rtx = gen_rtx_REG (mode, reg);
7783 rtx mem_rtx = gen_frame_mem (mode,
7784 gen_rtx_PLUS (Pmode,
7788 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7789 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7794 gcc_assert (entry->offset == d_rounding);
7798 push_regs (&live_regs_mask, current_function_interrupt);
7802 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7803 emit_insn (gen_GOTaddr2picreg ());
7805 if (SHMEDIA_REGS_STACK_ADJUST ())
7807 /* This must NOT go through the PLT, otherwise mach and macl
7808 may be clobbered. */
7809 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7811 ? "__GCC_push_shmedia_regs"
7812 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7813 emit_insn (gen_shmedia_save_restore_regs_compact
7814 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7817 if (target_flags != save_flags && ! current_function_interrupt)
7818 emit_insn (gen_toggle_sz ());
7820 target_flags = save_flags;
7822 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7823 stack_pointer_rtx, 0, NULL, true);
7824 stack_usage += rounded_frame_size (d) - d_rounding;
7826 if (frame_pointer_needed)
7827 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7829 if (TARGET_SHCOMPACT
7830 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7832 /* This must NOT go through the PLT, otherwise mach and macl
7833 may be clobbered. */
7834 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7835 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7836 emit_insn (gen_shcompact_incoming_args ());
7839 /* If we are profiling, make sure no instructions are scheduled before
7840 the call to mcount. Similarly if some call instructions are swapped
7841 before frame related insns, it'll confuse the unwinder because
7842 currently SH has no unwind info for function epilogues. */
7843 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7844 emit_insn (gen_blockage ());
7846 if (flag_stack_usage_info)
7847 current_function_static_stack_size = stack_usage;
7850 /* Expand code for the function epilogue. */
7852 sh_expand_epilogue (bool sibcall_p)
7854 HARD_REG_SET live_regs_mask;
7858 int save_flags = target_flags;
7859 int frame_size, save_size;
7860 int fpscr_deferred = 0;
7861 int e = sibcall_p ? -1 : 1;
7863 d = calc_live_regs (&live_regs_mask);
7866 frame_size = rounded_frame_size (d);
7870 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7872 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7873 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7874 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7876 total_size = d + tregs_space;
7877 total_size += rounded_frame_size (total_size);
7878 save_size = total_size - frame_size;
7880 /* If adjusting the stack in a single step costs nothing extra, do so.
7881 I.e. either if a single addi is enough, or we need a movi anyway,
7882 and we don't exceed the maximum offset range (the test for the
7883 latter is conservative for simplicity). */
7885 && ! frame_pointer_needed
7886 && (CONST_OK_FOR_I10 (total_size)
7887 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7888 && total_size <= 2044)))
7889 d_rounding = frame_size;
7891 frame_size -= d_rounding;
7894 if (frame_pointer_needed)
7896 /* We must avoid scheduling the epilogue with previous basic blocks.
7897 See PR/18032 and PR/40313. */
7898 emit_insn (gen_blockage ());
7899 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7900 &live_regs_mask, true);
7902 /* We must avoid moving the stack pointer adjustment past code
7903 which reads from the local frame, else an interrupt could
7904 occur after the SP adjustment and clobber data in the local
7906 emit_insn (gen_blockage ());
7907 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7909 else if (frame_size)
7911 /* We must avoid moving the stack pointer adjustment past code
7912 which reads from the local frame, else an interrupt could
7913 occur after the SP adjustment and clobber data in the local
7915 emit_insn (gen_blockage ());
7916 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7917 &live_regs_mask, true);
7920 if (SHMEDIA_REGS_STACK_ADJUST ())
7922 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7924 ? "__GCC_pop_shmedia_regs"
7925 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7926 /* This must NOT go through the PLT, otherwise mach and macl
7927 may be clobbered. */
7928 emit_insn (gen_shmedia_save_restore_regs_compact
7929 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7932 /* Pop all the registers. */
7934 if (target_flags != save_flags && ! current_function_interrupt)
7935 emit_insn (gen_toggle_sz ());
7938 int offset_base, offset;
7939 int offset_in_r0 = -1;
7941 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7942 save_schedule schedule;
7946 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7947 offset_base = -entry[1].offset + d_rounding;
7948 tmp_pnt = schedule.temps;
7949 for (; entry->mode != VOIDmode; entry--)
7951 enum machine_mode mode = (enum machine_mode) entry->mode;
7952 int reg = entry->reg;
7953 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7955 offset = offset_base + entry->offset;
7956 reg_rtx = gen_rtx_REG (mode, reg);
7958 mem_rtx = gen_frame_mem (mode,
7959 gen_rtx_PLUS (Pmode,
7963 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7966 if (HAVE_POST_INCREMENT
7967 && (offset == offset_in_r0
7968 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7969 && mem_rtx == NULL_RTX)
7970 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7972 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7974 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7975 post_inc = NULL_RTX;
7980 if (mem_rtx != NULL_RTX)
7983 if (offset_in_r0 == -1)
7985 emit_move_insn (r0, GEN_INT (offset));
7986 offset_in_r0 = offset;
7988 else if (offset != offset_in_r0)
7993 GEN_INT (offset - offset_in_r0)));
7994 offset_in_r0 += offset - offset_in_r0;
7997 if (post_inc != NULL_RTX)
8003 (Pmode, r0, stack_pointer_rtx));
8009 offset_in_r0 += GET_MODE_SIZE (mode);
8012 mem_rtx = gen_frame_mem (mode, r0);
8014 mem_rtx = gen_frame_mem (mode,
8015 gen_rtx_PLUS (Pmode,
8019 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8020 || mem_rtx == post_inc);
8023 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8024 && mem_rtx != post_inc)
8026 emit_move_insn (r0, mem_rtx);
8029 else if (TARGET_REGISTER_P (reg))
8031 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8033 /* Give the scheduler a bit of freedom by using up to
8034 MAX_TEMPS registers in a round-robin fashion. */
8035 emit_move_insn (tmp_reg, mem_rtx);
8038 tmp_pnt = schedule.temps;
8041 emit_move_insn (reg_rtx, mem_rtx);
8044 gcc_assert (entry->offset + offset_base == d + d_rounding);
8046 else /* ! TARGET_SH5 */
8051 /* For an ISR with RESBANK attribute assigned, don't pop PR
8053 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8054 && !sh_cfun_resbank_handler_p ())
8056 if (!frame_pointer_needed)
8057 emit_insn (gen_blockage ());
8061 /* Banked registers are popped first to avoid being scheduled in the
8062 delay slot. RTE switches banks before the ds instruction. */
8063 if (current_function_interrupt)
8065 bool use_movml = false;
8069 unsigned int count = 0;
8071 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8072 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8077 /* Use movml when all banked register are poped. */
8078 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8082 if (sh_cfun_resbank_handler_p ())
8086 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8088 /* We must avoid scheduling multiple load insn with another
8090 emit_insn (gen_blockage ());
8091 emit_insn (gen_movml_pop_banked (sp_reg));
8092 emit_insn (gen_blockage ());
8095 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8096 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8099 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8102 last_reg = FIRST_PSEUDO_REGISTER;
8104 for (i = 0; i < last_reg; i++)
8106 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8108 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8109 && hard_reg_set_intersect_p (live_regs_mask,
8110 reg_class_contents[DF_REGS]))
8112 /* For an ISR with RESBANK attribute assigned, don't pop
8113 following registers, R0-R14, MACH, MACL and GBR. */
8114 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8115 && ! (sh_cfun_resbank_handler_p ()
8116 && ((j >= FIRST_GENERAL_REG
8117 && j < LAST_GENERAL_REG)
8123 if (j == FIRST_FP_REG && fpscr_deferred)
8127 if (target_flags != save_flags && ! current_function_interrupt)
8128 emit_insn (gen_toggle_sz ());
8129 target_flags = save_flags;
8131 output_stack_adjust (crtl->args.pretend_args_size
8132 + save_size + d_rounding
8133 + crtl->args.info.stack_regs * 8,
8134 stack_pointer_rtx, e, NULL, true);
8136 if (crtl->calls_eh_return)
8137 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8138 EH_RETURN_STACKADJ_RTX));
8140 /* Switch back to the normal stack if necessary. */
8141 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8142 emit_insn (gen_sp_switch_2 ());
8144 /* Tell flow the insn that pops PR isn't dead. */
8145 /* PR_REG will never be live in SHmedia mode, and we don't need to
8146 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8147 by the return pattern. */
8148 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8149 emit_use (gen_rtx_REG (SImode, PR_REG));
8152 /* Emit code to change the current function's return address to RA.
8153 TEMP is available as a scratch register, if needed. */
8155 sh_set_return_address (rtx ra, rtx tmp)
8157 HARD_REG_SET live_regs_mask;
8159 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8162 d = calc_live_regs (&live_regs_mask);
8164 /* If pr_reg isn't life, we can set it (or the register given in
8165 sh_media_register_for_return) directly. */
8166 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8172 int rr_regno = sh_media_register_for_return ();
8177 rr = gen_rtx_REG (DImode, rr_regno);
8180 rr = gen_rtx_REG (SImode, pr_reg);
8182 emit_insn (GEN_MOV (rr, ra));
8183 /* Tell flow the register for return isn't dead. */
8191 save_schedule schedule;
8194 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8195 offset = entry[1].offset;
8196 for (; entry->mode != VOIDmode; entry--)
8197 if (entry->reg == pr_reg)
8200 /* We can't find pr register. */
8204 offset = entry->offset - offset;
8205 pr_offset = (rounded_frame_size (d) + offset
8206 + SHMEDIA_REGS_STACK_ADJUST ());
8209 pr_offset = rounded_frame_size (d);
8211 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8213 if (frame_pointer_needed)
8214 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8216 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8218 tmp = gen_frame_mem (Pmode, tmp);
8219 emit_insn (GEN_MOV (tmp, ra));
8220 /* Tell this store isn't dead. */
8224 /* Clear variables at function end. */
8226 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8227 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8232 sh_builtin_saveregs (void)
8234 /* First unnamed integer register. */
8235 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8236 /* Number of integer registers we need to save. */
8237 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8238 /* First unnamed SFmode float reg */
8239 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8240 /* Number of SFmode float regs to save. */
8241 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8244 alias_set_type alias_set;
8250 int pushregs = n_intregs;
8252 while (pushregs < NPARM_REGS (SImode) - 1
8253 && (CALL_COOKIE_INT_REG_GET
8254 (crtl->args.info.call_cookie,
8255 NPARM_REGS (SImode) - pushregs)
8258 crtl->args.info.call_cookie
8259 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8264 if (pushregs == NPARM_REGS (SImode))
8265 crtl->args.info.call_cookie
8266 |= (CALL_COOKIE_INT_REG (0, 1)
8267 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8269 crtl->args.info.call_cookie
8270 |= CALL_COOKIE_STACKSEQ (pushregs);
8272 crtl->args.pretend_args_size += 8 * n_intregs;
8274 if (TARGET_SHCOMPACT)
8278 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8280 error ("__builtin_saveregs not supported by this subtarget");
8287 /* Allocate block of memory for the regs. */
8288 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8289 Or can assign_stack_local accept a 0 SIZE argument? */
8290 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8293 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8294 else if (n_floatregs & 1)
8298 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8299 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8300 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8301 regbuf = change_address (regbuf, BLKmode, addr);
8303 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8307 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8308 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8309 XEXP (regbuf, 0), 4));
8310 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8311 emit_insn (gen_andsi3 (addr, addr, mask));
8312 regbuf = change_address (regbuf, BLKmode, addr);
8315 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8316 alias_set = get_varargs_alias_set ();
8317 set_mem_alias_set (regbuf, alias_set);
8320 This is optimized to only save the regs that are necessary. Explicitly
8321 named args need not be saved. */
8323 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8324 adjust_address (regbuf, BLKmode,
8325 n_floatregs * UNITS_PER_WORD),
8329 /* Return the address of the regbuf. */
8330 return XEXP (regbuf, 0);
8333 This is optimized to only save the regs that are necessary. Explicitly
8334 named args need not be saved.
8335 We explicitly build a pointer to the buffer because it halves the insn
8336 count when not optimizing (otherwise the pointer is built for each reg
8338 We emit the moves in reverse order so that we can use predecrement. */
8340 fpregs = copy_to_mode_reg (Pmode,
8341 plus_constant (Pmode, XEXP (regbuf, 0),
8342 n_floatregs * UNITS_PER_WORD));
8343 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8346 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8348 emit_insn (gen_addsi3 (fpregs, fpregs,
8349 GEN_INT (-2 * UNITS_PER_WORD)));
8350 mem = change_address (regbuf, DFmode, fpregs);
8351 emit_move_insn (mem,
8352 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8354 regno = first_floatreg;
8357 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8358 mem = change_address (regbuf, SFmode, fpregs);
8359 emit_move_insn (mem,
8360 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8361 + regno - SH_REG_MSW_OFFSET));
8365 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8369 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8370 mem = change_address (regbuf, SFmode, fpregs);
8371 emit_move_insn (mem,
8372 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8375 /* Return the address of the regbuf. */
8376 return XEXP (regbuf, 0);
8379 /* Define the `__builtin_va_list' type for the ABI. */
8381 sh_build_builtin_va_list (void)
8383 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8384 tree record, type_decl;
8386 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8387 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8388 return ptr_type_node;
8390 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8391 type_decl = build_decl (BUILTINS_LOCATION,
8392 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8394 f_next_o = build_decl (BUILTINS_LOCATION,
8395 FIELD_DECL, get_identifier ("__va_next_o"),
8397 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8399 get_identifier ("__va_next_o_limit"),
8401 f_next_fp = build_decl (BUILTINS_LOCATION,
8402 FIELD_DECL, get_identifier ("__va_next_fp"),
8404 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8406 get_identifier ("__va_next_fp_limit"),
8408 f_next_stack = build_decl (BUILTINS_LOCATION,
8409 FIELD_DECL, get_identifier ("__va_next_stack"),
8412 DECL_FIELD_CONTEXT (f_next_o) = record;
8413 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8414 DECL_FIELD_CONTEXT (f_next_fp) = record;
8415 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8416 DECL_FIELD_CONTEXT (f_next_stack) = record;
8418 TYPE_STUB_DECL (record) = type_decl;
8419 TYPE_NAME (record) = type_decl;
8420 TYPE_FIELDS (record) = f_next_o;
8421 DECL_CHAIN (f_next_o) = f_next_o_limit;
8422 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8423 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8424 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8426 layout_type (record);
8431 /* Implement `va_start' for varargs and stdarg. */
8433 sh_va_start (tree valist, rtx nextarg)
8435 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8436 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8442 expand_builtin_saveregs ();
8443 std_expand_builtin_va_start (valist, nextarg);
8447 if ((! TARGET_SH2E && ! TARGET_SH4)
8448 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8450 std_expand_builtin_va_start (valist, nextarg);
8454 f_next_o = TYPE_FIELDS (va_list_type_node);
8455 f_next_o_limit = DECL_CHAIN (f_next_o);
8456 f_next_fp = DECL_CHAIN (f_next_o_limit);
8457 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8458 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8460 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8462 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8463 valist, f_next_o_limit, NULL_TREE);
8464 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8466 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8467 valist, f_next_fp_limit, NULL_TREE);
8468 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8469 valist, f_next_stack, NULL_TREE);
8471 /* Call __builtin_saveregs. */
8472 u = make_tree (sizetype, expand_builtin_saveregs ());
8473 u = fold_convert (ptr_type_node, u);
8474 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8475 TREE_SIDE_EFFECTS (t) = 1;
8476 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8478 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8483 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8484 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8485 TREE_SIDE_EFFECTS (t) = 1;
8486 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8488 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8489 TREE_SIDE_EFFECTS (t) = 1;
8490 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8492 nint = crtl->args.info.arg_count[SH_ARG_INT];
8497 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8498 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8499 TREE_SIDE_EFFECTS (t) = 1;
8500 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8502 u = make_tree (ptr_type_node, nextarg);
8503 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8504 TREE_SIDE_EFFECTS (t) = 1;
8505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8508 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8509 member, return it. */
8511 find_sole_member (tree type)
8513 tree field, member = NULL_TREE;
8515 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8517 if (TREE_CODE (field) != FIELD_DECL)
8519 if (!DECL_SIZE (field))
8521 if (integer_zerop (DECL_SIZE (field)))
8530 /* Implement `va_arg'. */
8532 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8533 gimple_seq *post_p ATTRIBUTE_UNUSED)
8535 HOST_WIDE_INT size, rsize;
8536 tree tmp, pptr_type_node;
8537 tree addr, lab_over = NULL, result = NULL;
8541 if (!VOID_TYPE_P (type))
8542 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8544 pass_by_ref = false;
8547 type = build_pointer_type (type);
8549 size = int_size_in_bytes (type);
8550 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8551 pptr_type_node = build_pointer_type (ptr_type_node);
8553 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8554 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8556 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8557 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8562 f_next_o = TYPE_FIELDS (va_list_type_node);
8563 f_next_o_limit = DECL_CHAIN (f_next_o);
8564 f_next_fp = DECL_CHAIN (f_next_o_limit);
8565 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8566 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8568 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8570 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8571 valist, f_next_o_limit, NULL_TREE);
8572 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8573 valist, f_next_fp, NULL_TREE);
8574 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8575 valist, f_next_fp_limit, NULL_TREE);
8576 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8577 valist, f_next_stack, NULL_TREE);
8579 /* Structures with a single member with a distinct mode are passed
8580 like their member. This is relevant if the latter has a REAL_TYPE
8581 or COMPLEX_TYPE type. */
8583 while (TREE_CODE (eff_type) == RECORD_TYPE
8584 && (member = find_sole_member (eff_type))
8585 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8586 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8587 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8589 tree field_type = TREE_TYPE (member);
8591 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8592 eff_type = field_type;
8595 gcc_assert ((TYPE_ALIGN (eff_type)
8596 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8597 || (TYPE_ALIGN (eff_type)
8598 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8603 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8605 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8606 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8607 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8612 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8615 addr = create_tmp_var (pptr_type_node, NULL);
8616 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8617 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8619 valist = build_simple_mem_ref (addr);
8623 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8625 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8627 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8628 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8630 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8631 tmp = next_fp_limit;
8632 if (size > 4 && !is_double)
8633 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8634 tmp = build2 (GE_EXPR, boolean_type_node,
8635 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8636 cmp = build3 (COND_EXPR, void_type_node, tmp,
8637 build1 (GOTO_EXPR, void_type_node,
8638 unshare_expr (lab_false)), NULL_TREE);
8640 gimplify_and_add (cmp, pre_p);
8642 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8643 || (is_double || size == 16))
8645 tmp = fold_convert (sizetype, next_fp_tmp);
8646 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8647 size_int (UNITS_PER_WORD));
8648 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8649 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8652 gimplify_and_add (cmp, pre_p);
8654 #ifdef FUNCTION_ARG_SCmode_WART
8655 if (TYPE_MODE (eff_type) == SCmode
8656 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8658 tree subtype = TREE_TYPE (eff_type);
8662 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8663 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8666 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8667 real = get_initialized_tmp_var (real, pre_p, NULL);
8669 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8670 if (type != eff_type)
8671 result = build1 (VIEW_CONVERT_EXPR, type, result);
8672 result = get_initialized_tmp_var (result, pre_p, NULL);
8674 #endif /* FUNCTION_ARG_SCmode_WART */
8676 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8677 gimplify_and_add (tmp, pre_p);
8679 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8680 gimplify_and_add (tmp, pre_p);
8682 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8683 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8684 gimplify_assign (unshare_expr (next_fp_tmp),
8685 unshare_expr (valist), pre_p);
8687 gimplify_assign (unshare_expr (valist),
8688 unshare_expr (next_fp_tmp), post_p);
8689 valist = next_fp_tmp;
8693 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8694 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8695 unshare_expr (next_o_limit));
8696 tmp = build3 (COND_EXPR, void_type_node, tmp,
8697 build1 (GOTO_EXPR, void_type_node,
8698 unshare_expr (lab_false)),
8700 gimplify_and_add (tmp, pre_p);
8702 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8703 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8705 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8706 gimplify_and_add (tmp, pre_p);
8708 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8709 gimplify_and_add (tmp, pre_p);
8711 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8712 gimplify_assign (unshare_expr (next_o),
8713 unshare_expr (next_o_limit), pre_p);
8715 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8716 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8721 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8722 gimplify_and_add (tmp, pre_p);
8726 /* ??? In va-sh.h, there had been code to make values larger than
8727 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8729 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8732 gimplify_assign (result, tmp, pre_p);
8733 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8734 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8735 gimplify_and_add (tmp, pre_p);
8741 result = build_va_arg_indirect_ref (result);
8746 /* 64 bit floating points memory transfers are paired single precision loads
8747 or store. So DWARF information needs fixing in little endian (unless
8748 PR=SZ=1 in FPSCR). */
8750 sh_dwarf_register_span (rtx reg)
8752 unsigned regno = REGNO (reg);
8754 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8758 gen_rtx_PARALLEL (VOIDmode,
8760 gen_rtx_REG (SFmode, regno + 1),
8761 gen_rtx_REG (SFmode, regno)));
8764 static enum machine_mode
8765 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8766 int *punsignedp, const_tree funtype,
8769 if (sh_promote_prototypes (funtype))
8770 return promote_mode (type, mode, punsignedp);
8772 return default_promote_function_mode (type, mode, punsignedp, funtype,
8777 sh_promote_prototypes (const_tree type)
8783 return ! sh_attr_renesas_p (type);
8786 /* Whether an argument must be passed by reference. On SHcompact, we
8787 pretend arguments wider than 32-bits that would have been passed in
8788 registers are passed by reference, so that an SHmedia trampoline
8789 loads them into the full 64-bits registers. */
8791 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8792 const_tree type, bool named)
8794 unsigned HOST_WIDE_INT size;
8797 size = int_size_in_bytes (type);
8799 size = GET_MODE_SIZE (mode);
8801 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8803 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8804 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8805 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8807 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8808 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8815 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8816 const_tree type, bool named)
8818 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8820 if (targetm.calls.must_pass_in_stack (mode, type))
8823 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8824 wants to know about pass-by-reference semantics for incoming
8829 if (TARGET_SHCOMPACT)
8831 cum->byref = shcompact_byref (cum, mode, type, named);
8832 return cum->byref != 0;
8839 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8840 const_tree type, bool named ATTRIBUTE_UNUSED)
8842 /* ??? How can it possibly be correct to return true only on the
8843 caller side of the equation? Is there someplace else in the
8844 sh backend that's magically producing the copies? */
8845 return (get_cumulative_args (cum)->outgoing
8846 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8847 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8850 /* Round a register number up to a proper boundary for an arg of mode
8852 The SH doesn't care about double alignment, so we only
8853 round doubles to even regs when asked to explicitly. */
8855 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8857 /* FIXME: This used to be a macro and has been copy pasted into this
8858 function as is. Make this more readable. */
8860 (((TARGET_ALIGN_DOUBLE
8861 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8862 && (mode == DFmode || mode == DCmode)
8863 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8864 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8865 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8866 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8867 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8870 /* Return true if arg of the specified mode should be be passed in a register
8871 or false otherwise. */
8873 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8876 /* FIXME: This used to be a macro and has been copy pasted into this
8877 function as is. Make this more readable. */
8880 || (! TREE_ADDRESSABLE (type)
8881 && (! (TARGET_HITACHI || cum.renesas_abi)
8882 || ! (AGGREGATE_TYPE_P (type)
8884 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8885 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8888 ? ((mode) == BLKmode
8889 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8890 + int_size_in_bytes (type))
8891 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8892 : ((sh_round_reg (cum, mode)
8893 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8894 <= NPARM_REGS (mode)))
8895 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8899 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8900 tree type, bool named ATTRIBUTE_UNUSED)
8902 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8906 && sh_pass_in_reg_p (*cum, mode, type)
8907 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8908 && (sh_round_reg (*cum, mode)
8910 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8911 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8912 > NPARM_REGS (mode)))
8913 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8915 else if (!TARGET_SHCOMPACT
8916 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8917 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8919 return words * UNITS_PER_WORD;
8923 /* Define where to put the arguments to a function.
8924 Value is zero to push the argument on the stack,
8925 or a hard register in which to store the argument.
8927 MODE is the argument's machine mode.
8928 TYPE is the data type of the argument (as a tree).
8929 This is null for libcalls where that information may
8931 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8932 the preceding args and about the function being called.
8933 NAMED is nonzero if this argument is a named parameter
8934 (otherwise it is an extra parameter matching an ellipsis).
8936 On SH the first args are normally in registers
8937 and the rest are pushed. Any arg that starts within the first
8938 NPARM_REGS words is at least partially passed in a register unless
8939 its data type forbids. */
8941 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8942 const_tree type, bool named)
8944 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8946 if (! TARGET_SH5 && mode == VOIDmode)
8947 return GEN_INT (ca->renesas_abi ? 1 : 0);
8950 && sh_pass_in_reg_p (*ca, mode, type)
8951 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8955 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8956 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8958 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8959 gen_rtx_REG (SFmode,
8961 + (sh_round_reg (*ca, mode) ^ 1)),
8963 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8964 gen_rtx_REG (SFmode,
8966 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8968 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8971 /* If the alignment of a DF value causes an SF register to be
8972 skipped, we will use that skipped register for the next SF
8974 if ((TARGET_HITACHI || ca->renesas_abi)
8975 && ca->free_single_fp_reg
8977 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8979 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8980 ^ (mode == SFmode && TARGET_SH4
8981 && TARGET_LITTLE_ENDIAN
8982 && ! TARGET_HITACHI && ! ca->renesas_abi);
8983 return gen_rtx_REG (mode, regno);
8989 if (mode == VOIDmode && TARGET_SHCOMPACT)
8990 return GEN_INT (ca->call_cookie);
8992 /* The following test assumes unnamed arguments are promoted to
8994 if (mode == SFmode && ca->free_single_fp_reg)
8995 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8997 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8998 && (named || ! ca->prototype_p)
8999 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9001 if (! ca->prototype_p && TARGET_SHMEDIA)
9002 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9004 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9006 + ca->arg_count[(int) SH_ARG_FLOAT]);
9009 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9010 && (! TARGET_SHCOMPACT
9011 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9012 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9015 return gen_rtx_REG (mode, (FIRST_PARM_REG
9016 + ca->arg_count[(int) SH_ARG_INT]));
9025 /* Update the data in CUM to advance over an argument
9026 of mode MODE and data type TYPE.
9027 (TYPE is null for libcalls where that information may not be
9030 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
9031 const_tree type, bool named)
9033 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9037 else if (TARGET_SH5)
9039 const_tree type2 = (ca->byref && type
9042 enum machine_mode mode2 = (ca->byref && type
9045 int dwords = ((ca->byref
9048 ? int_size_in_bytes (type2)
9049 : GET_MODE_SIZE (mode2)) + 7) / 8;
9050 int numregs = MIN (dwords, NPARM_REGS (SImode)
9051 - ca->arg_count[(int) SH_ARG_INT]);
9055 ca->arg_count[(int) SH_ARG_INT] += numregs;
9056 if (TARGET_SHCOMPACT
9057 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9060 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9062 /* N.B. We want this also for outgoing. */
9063 ca->stack_regs += numregs;
9068 ca->stack_regs += numregs;
9069 ca->byref_regs += numregs;
9073 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9077 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9080 else if (dwords > numregs)
9082 int pushregs = numregs;
9084 if (TARGET_SHCOMPACT)
9085 ca->stack_regs += numregs;
9086 while (pushregs < NPARM_REGS (SImode) - 1
9087 && (CALL_COOKIE_INT_REG_GET
9089 NPARM_REGS (SImode) - pushregs)
9093 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9097 if (numregs == NPARM_REGS (SImode))
9099 |= CALL_COOKIE_INT_REG (0, 1)
9100 | CALL_COOKIE_STACKSEQ (numregs - 1);
9103 |= CALL_COOKIE_STACKSEQ (numregs);
9106 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9107 && (named || ! ca->prototype_p))
9109 if (mode2 == SFmode && ca->free_single_fp_reg)
9110 ca->free_single_fp_reg = 0;
9111 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9112 < NPARM_REGS (SFmode))
9115 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9117 - ca->arg_count[(int) SH_ARG_FLOAT]);
9119 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9121 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9123 if (ca->outgoing && numregs > 0)
9127 |= (CALL_COOKIE_INT_REG
9128 (ca->arg_count[(int) SH_ARG_INT]
9129 - numregs + ((numfpregs - 2) / 2),
9130 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9133 while (numfpregs -= 2);
9135 else if (mode2 == SFmode && (named)
9136 && (ca->arg_count[(int) SH_ARG_FLOAT]
9137 < NPARM_REGS (SFmode)))
9138 ca->free_single_fp_reg
9139 = FIRST_FP_PARM_REG - numfpregs
9140 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9146 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9148 /* Note that we've used the skipped register. */
9149 if (mode == SFmode && ca->free_single_fp_reg)
9151 ca->free_single_fp_reg = 0;
9154 /* When we have a DF after an SF, there's an SF register that get
9155 skipped in order to align the DF value. We note this skipped
9156 register, because the next SF value will use it, and not the
9157 SF that follows the DF. */
9159 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9161 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9162 + BASE_ARG_REG (mode));
9166 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9167 || sh_pass_in_reg_p (*ca, mode, type))
9168 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9169 = (sh_round_reg (*ca, mode)
9171 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9172 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9175 /* The Renesas calling convention doesn't quite fit into this scheme since
9176 the address is passed like an invisible argument, but one that is always
9177 passed in memory. */
9179 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9181 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9183 return gen_rtx_REG (Pmode, 2);
9186 /* Worker function for TARGET_FUNCTION_VALUE.
9188 For the SH, this is like LIBCALL_VALUE, except that we must change the
9189 mode like PROMOTE_MODE does.
9190 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9191 tested here has to be kept in sync with the one in
9192 explow.c:promote_mode. */
9194 sh_function_value (const_tree valtype,
9195 const_tree fn_decl_or_type,
9196 bool outgoing ATTRIBUTE_UNUSED)
9199 && !DECL_P (fn_decl_or_type))
9200 fn_decl_or_type = NULL;
9202 return gen_rtx_REG (
9203 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9204 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9205 && (TREE_CODE (valtype) == INTEGER_TYPE
9206 || TREE_CODE (valtype) == ENUMERAL_TYPE
9207 || TREE_CODE (valtype) == BOOLEAN_TYPE
9208 || TREE_CODE (valtype) == REAL_TYPE
9209 || TREE_CODE (valtype) == OFFSET_TYPE))
9210 && sh_promote_prototypes (fn_decl_or_type)
9211 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9212 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9215 /* Worker function for TARGET_LIBCALL_VALUE. */
9217 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9219 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9222 /* Return true if N is a possible register number of function value. */
9224 sh_function_value_regno_p (const unsigned int regno)
9226 return ((regno) == FIRST_RET_REG
9227 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9228 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9231 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9233 sh_return_in_memory (const_tree type, const_tree fndecl)
9237 if (TYPE_MODE (type) == BLKmode)
9238 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9240 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9244 return (TYPE_MODE (type) == BLKmode
9245 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9246 && TREE_CODE (type) == RECORD_TYPE));
9250 /* We actually emit the code in sh_expand_prologue. We used to use
9251 a static variable to flag that we need to emit this code, but that
9252 doesn't when inlining, when functions are deferred and then emitted
9253 later. Fortunately, we already have two flags that are part of struct
9254 function that tell if a function uses varargs or stdarg. */
9256 sh_setup_incoming_varargs (cumulative_args_t ca,
9257 enum machine_mode mode,
9259 int *pretend_arg_size,
9260 int second_time ATTRIBUTE_UNUSED)
9262 gcc_assert (cfun->stdarg);
9263 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9265 int named_parm_regs, anon_parm_regs;
9267 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9269 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9270 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9271 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9272 if (anon_parm_regs > 0)
9273 *pretend_arg_size = anon_parm_regs * 4;
9278 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9284 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9286 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9288 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9292 /* Define the offset between two registers, one to be eliminated, and
9293 the other its replacement, at the start of a routine. */
9295 initial_elimination_offset (int from, int to)
9298 int regs_saved_rounding = 0;
9299 int total_saved_regs_space;
9300 int total_auto_space;
9301 int save_flags = target_flags;
9303 HARD_REG_SET live_regs_mask;
9305 shmedia_space_reserved_for_target_registers = false;
9306 regs_saved = calc_live_regs (&live_regs_mask);
9307 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9309 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9311 shmedia_space_reserved_for_target_registers = true;
9312 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9315 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9316 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9317 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9319 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9320 copy_flags = target_flags;
9321 target_flags = save_flags;
9323 total_saved_regs_space = regs_saved + regs_saved_rounding;
9325 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9326 return total_saved_regs_space + total_auto_space
9327 + crtl->args.info.byref_regs * 8;
9329 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9330 return total_saved_regs_space + total_auto_space
9331 + crtl->args.info.byref_regs * 8;
9333 /* Initial gap between fp and sp is 0. */
9334 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9337 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9338 return rounded_frame_size (0);
9340 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9341 return rounded_frame_size (0);
9343 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9344 && (to == HARD_FRAME_POINTER_REGNUM
9345 || to == STACK_POINTER_REGNUM));
9348 int n = total_saved_regs_space;
9349 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9350 save_schedule schedule;
9353 n += total_auto_space;
9355 /* If it wasn't saved, there's not much we can do. */
9356 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9359 target_flags = copy_flags;
9361 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9362 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9363 if (entry->reg == pr_reg)
9365 target_flags = save_flags;
9366 return entry->offset;
9371 return total_auto_space;
9374 /* Parse the -mfixed-range= option string. */
9376 sh_fix_range (const char *const_str)
9379 char *str, *dash, *comma;
9381 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9382 REG2 are either register names or register numbers. The effect
9383 of this option is to mark the registers in the range from REG1 to
9384 REG2 as ``fixed'' so they won't be used by the compiler. */
9386 i = strlen (const_str);
9387 str = (char *) alloca (i + 1);
9388 memcpy (str, const_str, i + 1);
9392 dash = strchr (str, '-');
9395 warning (0, "value of -mfixed-range must have form REG1-REG2");
9399 comma = strchr (dash + 1, ',');
9403 first = decode_reg_name (str);
9406 warning (0, "unknown register name: %s", str);
9410 last = decode_reg_name (dash + 1);
9413 warning (0, "unknown register name: %s", dash + 1);
9421 warning (0, "%s-%s is an empty range", str, dash + 1);
9425 for (i = first; i <= last; ++i)
9426 fixed_regs[i] = call_used_regs[i] = 1;
9436 /* Insert any deferred function attributes from earlier pragmas. */
9438 sh_insert_attributes (tree node, tree *attributes)
9442 if (TREE_CODE (node) != FUNCTION_DECL)
9445 /* We are only interested in fields. */
9449 /* Append the attributes to the deferred attributes. */
9450 *sh_deferred_function_attributes_tail = *attributes;
9451 attrs = sh_deferred_function_attributes;
9455 /* Some attributes imply or require the interrupt attribute. */
9456 if (!lookup_attribute ("interrupt_handler", attrs)
9457 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9459 /* If we have a trapa_handler, but no interrupt_handler attribute,
9460 insert an interrupt_handler attribute. */
9461 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9462 /* We can't use sh_pr_interrupt here because that's not in the
9465 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9466 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9467 if the interrupt attribute is missing, we ignore the attribute
9469 else if (lookup_attribute ("sp_switch", attrs)
9470 || lookup_attribute ("trap_exit", attrs)
9471 || lookup_attribute ("nosave_low_regs", attrs)
9472 || lookup_attribute ("resbank", attrs))
9476 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9478 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9479 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9480 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9481 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9482 warning (OPT_Wattributes,
9483 "%qE attribute only applies to interrupt functions",
9484 TREE_PURPOSE (attrs));
9487 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9489 tail = &TREE_CHAIN (*tail);
9492 attrs = *attributes;
9496 /* Install the processed list. */
9497 *attributes = attrs;
9499 /* Clear deferred attributes. */
9500 sh_deferred_function_attributes = NULL_TREE;
9501 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9506 /*------------------------------------------------------------------------------
9507 Target specific attributes
9508 Supported attributes are:
9511 Specifies this function is an interrupt handler.
9514 Like interrupt_handler, but don't save all registers.
9517 Specifies an alternate stack for an interrupt handler to run on.
9520 Use a trapa to exit an interrupt function instead of rte.
9523 Don't save r0..r7 in an interrupt handler function.
9524 This is useful on SH3* and SH4*, which have a separate set of low
9525 regs for user and privileged modes.
9526 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9527 those that run with interrupts disabled and thus can't be
9528 interrupted thenselves).
9531 Use Renesas calling/layout conventions (functions and structures).
9534 In case of an interrupt handler function, use a register bank to
9535 save registers R0-R14, MACH, MACL, GBR and PR.
9536 This is available only on SH2A targets.
9539 Declares a function to be called using the TBR relative addressing
9540 mode. Takes an argument that specifies the slot number in the table
9541 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9544 /* Handle a 'resbank' attribute. */
9546 sh_handle_resbank_handler_attribute (tree * node, tree name,
9547 tree args ATTRIBUTE_UNUSED,
9548 int flags ATTRIBUTE_UNUSED,
9549 bool * no_add_attrs)
9553 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9555 *no_add_attrs = true;
9557 if (TREE_CODE (*node) != FUNCTION_DECL)
9559 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9561 *no_add_attrs = true;
9567 /* Handle an "interrupt_handler" attribute; arguments as in
9568 struct attribute_spec.handler. */
9570 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9571 tree args ATTRIBUTE_UNUSED,
9572 int flags ATTRIBUTE_UNUSED,
9575 if (TREE_CODE (*node) != FUNCTION_DECL)
9577 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9579 *no_add_attrs = true;
9581 else if (TARGET_SHCOMPACT)
9583 error ("attribute interrupt_handler is not compatible with -m5-compact");
9584 *no_add_attrs = true;
9590 /* Handle an 'function_vector' attribute; arguments as in
9591 struct attribute_spec.handler. */
9593 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9594 tree args ATTRIBUTE_UNUSED,
9595 int flags ATTRIBUTE_UNUSED,
9596 bool * no_add_attrs)
9600 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9602 *no_add_attrs = true;
9604 else if (TREE_CODE (*node) != FUNCTION_DECL)
9606 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9608 *no_add_attrs = true;
9610 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9612 /* The argument must be a constant integer. */
9613 warning (OPT_Wattributes,
9614 "%qE attribute argument not an integer constant",
9616 *no_add_attrs = true;
9618 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9620 /* The argument value must be between 0 to 255. */
9621 warning (OPT_Wattributes,
9622 "%qE attribute argument should be between 0 to 255",
9624 *no_add_attrs = true;
9629 /* Returns true if current function has been assigned the attribute
9630 'function_vector'. */
9632 sh2a_is_function_vector_call (rtx x)
9634 if (GET_CODE (x) == SYMBOL_REF
9635 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9637 tree tr = SYMBOL_REF_DECL (x);
9639 if (sh2a_function_vector_p (tr))
9646 /* Returns the function vector number, if the attribute
9647 'function_vector' is assigned, otherwise returns zero. */
9649 sh2a_get_function_vector_number (rtx x)
9654 if ((GET_CODE (x) == SYMBOL_REF)
9655 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9657 t = SYMBOL_REF_DECL (x);
9659 if (TREE_CODE (t) != FUNCTION_DECL)
9662 list = SH_ATTRIBUTES (t);
9665 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9667 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9671 list = TREE_CHAIN (list);
9680 /* Handle an "sp_switch" attribute; arguments as in
9681 struct attribute_spec.handler. */
9683 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9684 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9686 if (TREE_CODE (*node) != FUNCTION_DECL)
9688 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9690 *no_add_attrs = true;
9692 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9694 /* The argument must be a constant string. */
9695 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9697 *no_add_attrs = true;
9703 /* Handle an "trap_exit" attribute; arguments as in
9704 struct attribute_spec.handler. */
9706 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9707 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9709 if (TREE_CODE (*node) != FUNCTION_DECL)
9711 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9713 *no_add_attrs = true;
9715 /* The argument specifies a trap number to be used in a trapa instruction
9716 at function exit (instead of an rte instruction). */
9717 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9719 /* The argument must be a constant integer. */
9720 warning (OPT_Wattributes, "%qE attribute argument not an "
9721 "integer constant", name);
9722 *no_add_attrs = true;
9729 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9730 tree name ATTRIBUTE_UNUSED,
9731 tree args ATTRIBUTE_UNUSED,
9732 int flags ATTRIBUTE_UNUSED,
9733 bool *no_add_attrs ATTRIBUTE_UNUSED)
9738 /* True if __attribute__((renesas)) or -mrenesas. */
9740 sh_attr_renesas_p (const_tree td)
9744 if (td == NULL_TREE)
9747 td = TREE_TYPE (td);
9748 if (td == error_mark_node)
9750 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9754 /* True if __attribute__((renesas)) or -mrenesas, for the current
9757 sh_cfun_attr_renesas_p (void)
9759 return sh_attr_renesas_p (current_function_decl);
9762 /* Returns true if the current function has the "interrupt_handler"
9765 sh_cfun_interrupt_handler_p (void)
9767 return (lookup_attribute ("interrupt_handler",
9768 DECL_ATTRIBUTES (current_function_decl))
9772 /* Returns true if FUNC has been assigned the attribute
9773 "function_vector". */
9775 sh2a_function_vector_p (tree func)
9778 if (TREE_CODE (func) != FUNCTION_DECL)
9781 list = SH_ATTRIBUTES (func);
9784 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9787 list = TREE_CHAIN (list);
9792 /* Returns true if given tree has the "resbank" attribute set. */
9794 sh_cfun_resbank_handler_p (void)
9796 return ((lookup_attribute ("resbank",
9797 DECL_ATTRIBUTES (current_function_decl))
9799 && (lookup_attribute ("interrupt_handler",
9800 DECL_ATTRIBUTES (current_function_decl))
9801 != NULL_TREE) && TARGET_SH2A);
9804 /* Returns true if the current function has a "trap_exit" attribute set. */
9806 sh_cfun_trap_exit_p (void)
9808 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9812 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9814 sh_check_pch_target_flags (int old_flags)
9816 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9817 | MASK_SH_E | MASK_HARD_SH4
9818 | MASK_FPU_SINGLE | MASK_SH4))
9819 return _("created and used with different architectures / ABIs");
9820 if ((old_flags ^ target_flags) & MASK_HITACHI)
9821 return _("created and used with different ABIs");
9822 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9823 return _("created and used with different endianness");
9827 /* Predicates used by the templates. */
9829 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9830 Used only in general_movsrc_operand. */
9832 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9844 /* Returns true if OP is a floating point value with value 0.0. */
9846 fp_zero_operand (rtx op)
9850 if (GET_MODE (op) != SFmode)
9853 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9854 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9857 /* Returns true if OP is a floating point value with value 1.0. */
9859 fp_one_operand (rtx op)
9863 if (GET_MODE (op) != SFmode)
9866 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9867 return REAL_VALUES_EQUAL (r, dconst1);
9870 /* In general mode switching is used. If we are
9871 compiling without -mfmovd, movsf_ie isn't taken into account for
9872 mode switching. We could check in machine_dependent_reorg for
9873 cases where we know we are in single precision mode, but there is
9874 interface to find that out during reload, so we must avoid
9875 choosing an fldi alternative during reload and thus failing to
9876 allocate a scratch register for the constant loading. */
9883 /* Return the TLS type for TLS symbols. */
9885 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9887 if (GET_CODE (op) != SYMBOL_REF)
9888 return TLS_MODEL_NONE;
9889 return SYMBOL_REF_TLS_MODEL (op);
9892 /* Return the destination address of a branch. */
9894 branch_dest (rtx branch)
9896 rtx dest = SET_SRC (PATTERN (branch));
9899 if (GET_CODE (dest) == IF_THEN_ELSE)
9900 dest = XEXP (dest, 1);
9901 dest = XEXP (dest, 0);
9902 dest_uid = INSN_UID (dest);
9903 return INSN_ADDRESSES (dest_uid);
9906 /* Return nonzero if REG is not used after INSN.
9907 We assume REG is a reload reg, and therefore does
9908 not live past labels. It may live past calls or jumps though. */
9910 reg_unused_after (rtx reg, rtx insn)
9915 /* If the reg is set by this instruction, then it is safe for our
9916 case. Disregard the case where this is a store to memory, since
9917 we are checking a register used in the store address. */
9918 set = single_set (insn);
9919 if (set && !MEM_P (SET_DEST (set))
9920 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9923 while ((insn = NEXT_INSN (insn)))
9929 code = GET_CODE (insn);
9932 /* If this is a label that existed before reload, then the register
9933 is dead here. However, if this is a label added by reorg, then
9934 the register may still be live here. We can't tell the difference,
9935 so we just ignore labels completely. */
9936 if (code == CODE_LABEL)
9941 if (code == JUMP_INSN)
9944 /* If this is a sequence, we must handle them all at once.
9945 We could have for instance a call that sets the target register,
9946 and an insn in a delay slot that uses the register. In this case,
9947 we must return 0. */
9948 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9953 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9955 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9956 rtx set = single_set (this_insn);
9958 if (CALL_P (this_insn))
9960 else if (JUMP_P (this_insn))
9962 if (INSN_ANNULLED_BRANCH_P (this_insn))
9967 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9969 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9971 if (!MEM_P (SET_DEST (set)))
9977 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9982 else if (code == JUMP_INSN)
9986 set = single_set (insn);
9987 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9989 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9990 return !MEM_P (SET_DEST (set));
9991 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9994 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10002 static GTY(()) rtx t_reg_rtx;
10004 get_t_reg_rtx (void)
10007 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10011 static GTY(()) rtx fpscr_rtx;
10013 get_fpscr_rtx (void)
10017 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
10018 REG_USERVAR_P (fpscr_rtx) = 1;
10019 mark_user_reg (fpscr_rtx);
10021 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
10022 mark_user_reg (fpscr_rtx);
10026 static GTY(()) tree fpscr_values;
10029 emit_fpu_switch (rtx scratch, int index)
10033 if (fpscr_values == NULL)
10037 t = build_index_type (integer_one_node);
10038 t = build_array_type (integer_type_node, t);
10039 t = build_decl (BUILTINS_LOCATION,
10040 VAR_DECL, get_identifier ("__fpscr_values"), t);
10041 DECL_ARTIFICIAL (t) = 1;
10042 DECL_IGNORED_P (t) = 1;
10043 DECL_EXTERNAL (t) = 1;
10044 TREE_STATIC (t) = 1;
10045 TREE_PUBLIC (t) = 1;
10051 src = DECL_RTL (fpscr_values);
10052 if (!can_create_pseudo_p ())
10054 emit_move_insn (scratch, XEXP (src, 0));
10056 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10057 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10060 src = adjust_address (src, PSImode, index * 4);
10062 dst = get_fpscr_rtx ();
10063 emit_move_insn (dst, src);
10067 emit_sf_insn (rtx pat)
10073 emit_df_insn (rtx pat)
10079 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10081 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10085 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10087 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10088 get_fpscr_rtx ()));
10092 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10094 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10098 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10100 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10101 get_fpscr_rtx ()));
10104 static rtx get_free_reg (HARD_REG_SET);
10106 /* This function returns a register to use to load the address to load
10107 the fpscr from. Currently it always returns r1 or r7, but when we are
10108 able to use pseudo registers after combine, or have a better mechanism
10109 for choosing a register, it should be done here. */
10110 /* REGS_LIVE is the liveness information for the point for which we
10111 need this allocation. In some bare-bones exit blocks, r1 is live at the
10112 start. We can even have all of r0..r3 being live:
10113 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10114 INSN before which new insns are placed with will clobber the register
10115 we return. If a basic block consists only of setting the return value
10116 register to a pseudo and using that register, the return value is not
10117 live before or after this block, yet we we'll insert our insns right in
10120 get_free_reg (HARD_REG_SET regs_live)
10122 if (! TEST_HARD_REG_BIT (regs_live, 1))
10123 return gen_rtx_REG (Pmode, 1);
10125 /* Hard reg 1 is live; since this is a small register classes target,
10126 there shouldn't be anything but a jump before the function end. */
10127 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10128 return gen_rtx_REG (Pmode, 7);
10131 /* This function will set the fpscr from memory.
10132 MODE is the mode we are setting it to. */
10134 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10136 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10137 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10140 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10141 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10144 /* Is the given character a logical line separator for the assembler? */
10145 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10146 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10150 sequence_insn_p (rtx insn)
10152 rtx_insn *prev, *next;
10154 prev = PREV_INSN (insn);
10158 next = NEXT_INSN (prev);
10162 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10166 sh_insn_length_adjustment (rtx_insn *insn)
10168 /* Instructions with unfilled delay slots take up an extra two bytes for
10169 the nop in the delay slot. */
10170 if (((NONJUMP_INSN_P (insn)
10171 && GET_CODE (PATTERN (insn)) != USE
10172 && GET_CODE (PATTERN (insn)) != CLOBBER)
10173 || CALL_P (insn) || JUMP_P (insn))
10174 && ! sequence_insn_p (insn)
10175 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10178 /* SH2e has a bug that prevents the use of annulled branches, so if
10179 the delay slot is not filled, we'll have to put a NOP in it. */
10180 if (sh_cpu_attr == CPU_SH2E
10182 && get_attr_type (insn) == TYPE_CBRANCH
10183 && ! sequence_insn_p (insn))
10186 /* sh-dsp parallel processing insn take four bytes instead of two. */
10188 if (NONJUMP_INSN_P (insn))
10191 rtx body = PATTERN (insn);
10194 bool maybe_label = true;
10196 if (GET_CODE (body) == ASM_INPUT)
10197 templ = XSTR (body, 0);
10198 else if (asm_noperands (body) >= 0)
10200 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10205 int ppi_adjust = 0;
10209 while (c == ' ' || c == '\t');
10210 /* all sh-dsp parallel-processing insns start with p.
10211 The only non-ppi sh insn starting with p is pref.
10212 The only ppi starting with pr is prnd. */
10213 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10215 /* The repeat pseudo-insn expands two three insns, a total of
10216 six bytes in size. */
10217 else if ((c == 'r' || c == 'R')
10218 && ! strncasecmp ("epeat", templ, 5))
10220 while (c && c != '\n'
10221 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10223 /* If this is a label, it is obviously not a ppi insn. */
10224 if (c == ':' && maybe_label)
10229 else if (c == '\'' || c == '"')
10230 maybe_label = false;
10234 maybe_label = c != ':';
10242 /* Return TRUE for a valid displacement for the REG+disp addressing
10245 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10248 if (! CONST_INT_P (op))
10251 if (TARGET_SHMEDIA)
10255 /* Check if this is the address of an unaligned load / store. */
10256 if (mode == VOIDmode)
10257 return satisfies_constraint_I06 (op);
10259 size = GET_MODE_SIZE (mode);
10260 return (!(INTVAL (op) & (size - 1))
10261 && INTVAL (op) >= -512 * size
10262 && INTVAL (op) < 512 * size);
10266 const HOST_WIDE_INT offset = INTVAL (op);
10267 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10268 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10270 /* If the mode does not support any displacement always return false.
10271 Even though an index of '0' is actually always valid, it will cause
10272 troubles when e.g. a DFmode move is split into two SFmode moves,
10273 where one SFmode move will have index '0' and the other move will
10275 if (!allow_zero && max_disp < 1)
10278 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10282 /* Recognize an RTL expression that is a valid memory address for
10284 The MODE argument is the machine mode for the MEM expression
10285 that wants to use this address.
10294 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10296 if (! ALLOW_INDEXED_ADDRESS
10297 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10300 if (REG_P (x) && REGNO (x) == GBR_REG)
10303 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10305 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10306 && ! TARGET_SHMEDIA
10307 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10309 else if (GET_CODE (x) == PLUS
10310 && (mode != PSImode || reload_completed))
10312 rtx xop0 = XEXP (x, 0);
10313 rtx xop1 = XEXP (x, 1);
10315 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10316 return gbr_displacement (xop1, mode);
10318 if (GET_MODE_SIZE (mode) <= 8
10319 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10320 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10323 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10324 || ((xop0 == stack_pointer_rtx
10325 || xop0 == hard_frame_pointer_rtx)
10326 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10327 || ((xop1 == stack_pointer_rtx
10328 || xop1 == hard_frame_pointer_rtx)
10329 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10330 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10331 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10332 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10333 && TARGET_FMOVD && mode == DFmode)))
10335 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10336 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10338 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10339 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10347 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10348 isn't protected by a PIC unspec. */
10350 nonpic_symbol_mentioned_p (rtx x)
10355 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10356 || GET_CODE (x) == PC)
10359 /* We don't want to look into the possible MEM location of a
10360 CONST_DOUBLE, since we're not going to use it, in general. */
10361 if (GET_CODE (x) == CONST_DOUBLE)
10364 if (GET_CODE (x) == UNSPEC
10365 && (XINT (x, 1) == UNSPEC_PIC
10366 || XINT (x, 1) == UNSPEC_GOT
10367 || XINT (x, 1) == UNSPEC_GOTOFF
10368 || XINT (x, 1) == UNSPEC_GOTPLT
10369 || XINT (x, 1) == UNSPEC_GOTTPOFF
10370 || XINT (x, 1) == UNSPEC_DTPOFF
10371 || XINT (x, 1) == UNSPEC_TPOFF
10372 || XINT (x, 1) == UNSPEC_PLT
10373 || XINT (x, 1) == UNSPEC_SYMOFF
10374 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10377 fmt = GET_RTX_FORMAT (GET_CODE (x));
10378 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10383 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10384 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10387 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10394 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10395 @GOTOFF in `reg'. */
10397 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10400 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10403 if (GET_CODE (orig) == LABEL_REF
10404 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10406 if (reg == NULL_RTX)
10407 reg = gen_reg_rtx (Pmode);
10409 emit_insn (gen_symGOTOFF2reg (reg, orig));
10412 else if (GET_CODE (orig) == SYMBOL_REF)
10414 if (reg == NULL_RTX)
10415 reg = gen_reg_rtx (Pmode);
10417 emit_insn (gen_symGOT2reg (reg, orig));
10423 /* Given a (logical) mode size and an offset in bytes, try to find a the
10424 appropriate displacement value for a mov insn. On SH the displacements
10425 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10426 15 bytes in QImode. To compensate this we create a new base address by
10427 adding an adjustment value to it.
10429 If the originally requested offset is greater than 127 we prefer using
10430 values 124..127 over 128..131 to increase opportunities to use the
10433 In some cases it is possible that a requested offset might seem unaligned
10434 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10435 This is compensated by adjusting the base address so that the effective
10436 address of the displacement move insn will be aligned.
10438 This is not the best possible way of rebasing the base address, as it
10439 does not look at other present displacement addressings around it.
10440 In some cases this can create more base address adjustments than would
10441 actually be necessary. */
10448 static struct disp_adjust
10449 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10451 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10453 /* Do not try to use SH2A's large displacements here, because this would
10454 effectively disable the small displacement insns. */
10455 const int mode_sz = GET_MODE_SIZE (mode);
10456 const int mov_insn_sz = mov_insn_size (mode, false);
10457 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10458 const int max_disp_next = max_disp + mov_insn_sz;
10459 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10460 HOST_WIDE_INT offset_adjust;
10462 /* In some cases this actually does happen and we must check for it. */
10463 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10466 /* Keeps the previous behavior for QImode displacement addressing.
10467 This just decides how the offset is re-based. Removing this special
10468 case will result in slightly bigger code on average, but it's not that
10470 if (mov_insn_sz == 1)
10471 align_modifier = 0;
10473 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10475 if (mode_sz + offset - offset_adjust <= max_disp_next)
10477 res.offset_adjust = GEN_INT (offset_adjust);
10478 res.mov_disp = GEN_INT (offset - offset_adjust);
10484 /* Try to modify an illegitimate address and make it legitimate.
10485 If we find one, return the new, valid address.
10486 Otherwise, return the original address. */
10488 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10491 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10493 if (TARGET_SHMEDIA)
10496 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10497 || (TARGET_SH2E && mode == SFmode))
10500 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10501 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10503 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10504 INTVAL (XEXP (x, 1)));
10506 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10508 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10509 adj.offset_adjust, NULL_RTX, 0,
10511 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10518 /* Attempt to replace *p, which is an address that needs reloading, with
10519 a valid memory address for an operand of mode MODE.
10520 Like for sh_legitimize_address, for the SH we try to get a normal form
10521 of the address. That will allow inheritance of the address reloads. */
10523 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10526 enum reload_type type = (enum reload_type) itype;
10527 const int mode_sz = GET_MODE_SIZE (mode);
10529 if (! ALLOW_INDEXED_ADDRESS
10530 && GET_CODE (*p) == PLUS
10531 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10533 *p = copy_rtx (*p);
10534 push_reload (*p, NULL_RTX, p, NULL,
10535 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10539 if (! ALLOW_INDEXED_ADDRESS
10540 && GET_CODE (*p) == PLUS
10541 && GET_CODE (XEXP (*p, 0)) == PLUS)
10543 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10544 XEXP (XEXP (*p, 0), 1));
10545 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10546 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10547 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10551 if (TARGET_SHMEDIA)
10554 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10555 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10556 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10557 && (ALLOW_INDEXED_ADDRESS
10558 || XEXP (*p, 0) == stack_pointer_rtx
10559 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10561 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10562 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10564 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10566 push_reload (*p, NULL_RTX, p, NULL,
10567 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10571 if (TARGET_SH2E && mode == SFmode)
10573 *p = copy_rtx (*p);
10574 push_reload (*p, NULL_RTX, p, NULL,
10575 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10579 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10580 moves because then reload has a problem figuring the constraint
10581 that the move insn target/source reg must be R0.
10582 Or maybe some handling is wrong in sh_secondary_reload for this
10583 to work properly? */
10584 if ((mode_sz == 4 || mode_sz == 8)
10585 && ! (TARGET_SH4 && mode == DFmode)
10586 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10588 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10589 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10590 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10591 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10596 /* We must re-recognize what we created before. */
10597 if (GET_CODE (*p) == PLUS
10598 && (mode_sz == 4 || mode_sz == 8)
10599 && GET_CODE (XEXP (*p, 0)) == PLUS
10600 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10601 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10602 && CONST_INT_P (XEXP (*p, 1))
10603 && ! (TARGET_SH2E && mode == SFmode))
10605 /* Because this address is so complex, we know it must have
10606 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10607 it is already unshared, and needs no further unsharing. */
10608 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10609 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10616 /* In the name of slightly smaller debug output, and to cater to
10617 general assembler lossage, recognize various UNSPEC sequences
10618 and turn them back into a direct symbol reference. */
10620 sh_delegitimize_address (rtx orig_x)
10624 orig_x = delegitimize_mem_from_attrs (orig_x);
10629 if (GET_CODE (x) == CONST)
10632 if (GET_CODE (y) == UNSPEC)
10634 if (XINT (y, 1) == UNSPEC_GOT
10635 || XINT (y, 1) == UNSPEC_GOTOFF
10636 || XINT (y, 1) == UNSPEC_SYMOFF)
10637 return XVECEXP (y, 0, 0);
10638 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10640 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10642 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10644 if (GET_CODE (symplt) == UNSPEC
10645 && XINT (symplt, 1) == UNSPEC_PLT)
10646 return XVECEXP (symplt, 0, 0);
10649 else if (TARGET_SHMEDIA
10650 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10651 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10653 rtx offset = XVECEXP (y, 0, 1);
10655 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10656 if (MEM_P (orig_x))
10657 x = replace_equiv_address_nv (orig_x, x);
10666 /* Mark the use of a constant in the literal table. If the constant
10667 has multiple labels, make it unique. */
10669 mark_constant_pool_use (rtx x)
10671 rtx insn, lab, pattern;
10676 switch (GET_CODE (x))
10686 /* Get the first label in the list of labels for the same constant
10687 and delete another labels in the list. */
10689 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10691 if (!LABEL_P (insn)
10692 || LABEL_REFS (insn) != NEXT_INSN (insn))
10697 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10698 INSN_DELETED_P (insn) = 1;
10700 /* Mark constants in a window. */
10701 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10703 if (!NONJUMP_INSN_P (insn))
10706 pattern = PATTERN (insn);
10707 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10710 switch (XINT (pattern, 1))
10712 case UNSPECV_CONST2:
10713 case UNSPECV_CONST4:
10714 case UNSPECV_CONST8:
10715 XVECEXP (pattern, 0, 1) = const1_rtx;
10717 case UNSPECV_WINDOW_END:
10718 if (XVECEXP (pattern, 0, 0) == x)
10721 case UNSPECV_CONST_END:
10731 /* Return true if it's possible to redirect BRANCH1 to the destination
10732 of an unconditional jump BRANCH2. We only want to do this if the
10733 resulting branch will have a short displacement. */
10735 sh_can_redirect_branch (rtx branch1, rtx branch2)
10737 if (flag_expensive_optimizations && simplejump_p (branch2))
10739 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10743 for (distance = 0, insn = NEXT_INSN (branch1);
10744 insn && distance < 256;
10745 insn = PREV_INSN (insn))
10750 distance += get_attr_length (insn);
10752 for (distance = 0, insn = NEXT_INSN (branch1);
10753 insn && distance < 256;
10754 insn = NEXT_INSN (insn))
10759 distance += get_attr_length (insn);
10765 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10767 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10768 unsigned int new_reg)
10770 /* Interrupt functions can only use registers that have already been
10771 saved by the prologue, even if they would normally be
10773 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10779 /* Function to update the integer COST
10780 based on the relationship between INSN that is dependent on
10781 DEP_INSN through the dependence LINK. The default is to make no
10782 adjustment to COST. This can be used for example to specify to
10783 the scheduler that an output- or anti-dependence does not incur
10784 the same cost as a data-dependence. The return value should be
10785 the new value for COST. */
10787 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10791 if (TARGET_SHMEDIA)
10793 /* On SHmedia, if the dependence is an anti-dependence or
10794 output-dependence, there is no cost. */
10795 if (REG_NOTE_KIND (link) != 0)
10797 /* However, dependencies between target register loads and
10798 uses of the register in a subsequent block that are separated
10799 by a conditional branch are not modelled - we have to do with
10800 the anti-dependency between the target register load and the
10801 conditional branch that ends the current block. */
10802 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10803 && GET_CODE (PATTERN (dep_insn)) == SET
10804 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10805 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10806 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10808 int orig_cost = cost;
10809 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10810 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10811 ? insn : JUMP_LABEL (insn));
10812 /* On the likely path, the branch costs 1, on the unlikely path,
10816 target = next_active_insn (target);
10817 while (target && ! flow_dependent_p (target, dep_insn)
10819 /* If two branches are executed in immediate succession, with the
10820 first branch properly predicted, this causes a stall at the
10821 second branch, hence we won't need the target for the
10822 second branch for two cycles after the launch of the first
10824 if (cost > orig_cost - 2)
10825 cost = orig_cost - 2;
10831 else if (get_attr_is_mac_media (insn)
10832 && get_attr_is_mac_media (dep_insn))
10835 else if (! reload_completed
10836 && GET_CODE (PATTERN (insn)) == SET
10837 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10838 && GET_CODE (PATTERN (dep_insn)) == SET
10839 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10842 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10843 that is needed at the target. */
10844 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10845 && ! flow_dependent_p (insn, dep_insn))
10848 else if (REG_NOTE_KIND (link) == 0)
10850 enum attr_type type;
10853 if (recog_memoized (insn) < 0
10854 || recog_memoized (dep_insn) < 0)
10857 dep_set = single_set (dep_insn);
10859 /* The latency that we specify in the scheduling description refers
10860 to the actual output, not to an auto-increment register; for that,
10861 the latency is one. */
10862 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10864 rtx set = single_set (insn);
10867 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10868 && (!MEM_P (SET_DEST (set))
10869 || !reg_mentioned_p (SET_DEST (dep_set),
10870 XEXP (SET_DEST (set), 0))))
10873 /* The only input for a call that is timing-critical is the
10874 function's address. */
10877 rtx call = get_call_rtx_from (insn);
10879 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10880 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10881 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10882 cost -= TARGET_SH4_300 ? 3 : 6;
10884 /* Likewise, the most timing critical input for an sfuncs call
10885 is the function address. However, sfuncs typically start
10886 using their arguments pretty quickly.
10887 Assume a four cycle delay for SH4 before they are needed.
10888 Cached ST40-300 calls are quicker, so assume only a one
10890 ??? Maybe we should encode the delays till input registers
10891 are needed by sfuncs into the sfunc call insn. */
10892 /* All sfunc calls are parallels with at least four components.
10893 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10894 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10895 && XVECLEN (PATTERN (insn), 0) >= 4
10896 && (reg = sfunc_uses_reg (insn)))
10898 if (! reg_set_p (reg, dep_insn))
10899 cost -= TARGET_SH4_300 ? 1 : 4;
10901 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10903 enum attr_type dep_type = get_attr_type (dep_insn);
10905 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10907 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10908 && (type = get_attr_type (insn)) != TYPE_CALL
10909 && type != TYPE_SFUNC)
10911 /* When the preceding instruction loads the shift amount of
10912 the following SHAD/SHLD, the latency of the load is increased
10914 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10915 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10916 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10917 XEXP (SET_SRC (single_set (insn)),
10920 /* When an LS group instruction with a latency of less than
10921 3 cycles is followed by a double-precision floating-point
10922 instruction, FIPR, or FTRV, the latency of the first
10923 instruction is increased to 3 cycles. */
10925 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10926 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10928 /* The lsw register of a double-precision computation is ready one
10930 else if (reload_completed
10931 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10932 && (use_pat = single_set (insn))
10933 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10934 SET_SRC (use_pat)))
10937 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10938 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10941 else if (TARGET_SH4_300)
10943 /* Stores need their input register two cycles later. */
10944 if (dep_set && cost >= 1
10945 && ((type = get_attr_type (insn)) == TYPE_STORE
10946 || type == TYPE_PSTORE
10947 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10949 rtx set = single_set (insn);
10951 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10952 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10955 /* But don't reduce the cost below 1 if the address depends
10956 on a side effect of dep_insn. */
10958 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10964 /* An anti-dependence penalty of two applies if the first insn is a double
10965 precision fadd / fsub / fmul. */
10966 else if (!TARGET_SH4_300
10967 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10968 && recog_memoized (dep_insn) >= 0
10969 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10970 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10971 /* A lot of alleged anti-flow dependences are fake,
10972 so check this one is real. */
10973 && flow_dependent_p (dep_insn, insn))
10979 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10980 if DEP_INSN is anti-flow dependent on INSN. */
10982 flow_dependent_p (rtx insn, rtx dep_insn)
10984 rtx tmp = PATTERN (insn);
10986 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10987 return tmp == NULL_RTX;
10990 /* A helper function for flow_dependent_p called through note_stores. */
10992 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10994 rtx * pinsn = (rtx *) data;
10996 if (*pinsn && reg_referenced_p (x, *pinsn))
11000 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11001 'special function' patterns (type sfunc) that clobber pr, but that
11002 do not look like function calls to leaf_function_p. Hence we must
11003 do this extra check. */
11005 sh_pr_n_sets (void)
11007 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11010 /* Return where to allocate pseudo for a given hard register initial
11013 sh_allocate_initial_value (rtx hard_reg)
11017 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11020 && ! sh_pr_n_sets ()
11021 && ! (TARGET_SHCOMPACT
11022 && ((crtl->args.info.call_cookie
11023 & ~ CALL_COOKIE_RET_TRAMP (1))
11024 || crtl->saves_all_registers)))
11027 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11035 /* This function returns "2" to indicate dual issue for the SH4
11036 processor. To be used by the DFA pipeline description. */
11038 sh_issue_rate (void)
11040 if (TARGET_SUPERSCALAR)
11046 /* Functions for ready queue reordering for sched1. */
11048 /* Get weight for mode for a set x. */
11050 find_set_regmode_weight (rtx x, enum machine_mode mode)
11052 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11054 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11056 if (REG_P (SET_DEST (x)))
11058 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11068 /* Get regmode weight for insn. */
11070 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11072 short reg_weight = 0;
11075 /* Increment weight for each register born here. */
11076 x = PATTERN (insn);
11077 reg_weight += find_set_regmode_weight (x, mode);
11078 if (GET_CODE (x) == PARALLEL)
11081 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11083 x = XVECEXP (PATTERN (insn), 0, j);
11084 reg_weight += find_set_regmode_weight (x, mode);
11087 /* Decrement weight for each register that dies here. */
11088 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11090 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11092 rtx note = XEXP (x, 0);
11093 if (REG_P (note) && GET_MODE (note) == mode)
11100 /* Calculate regmode weights for all insns of a basic block. */
11102 find_regmode_weight (basic_block b, enum machine_mode mode)
11104 rtx_insn *insn, *next_tail, *head, *tail;
11106 get_ebb_head_tail (b, b, &head, &tail);
11107 next_tail = NEXT_INSN (tail);
11109 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11111 /* Handle register life information. */
11112 if (!INSN_P (insn))
11115 if (mode == SFmode)
11116 INSN_REGMODE_WEIGHT (insn, mode) =
11117 find_insn_regmode_weight (insn, mode)
11118 + 2 * find_insn_regmode_weight (insn, DFmode);
11119 else if (mode == SImode)
11120 INSN_REGMODE_WEIGHT (insn, mode) =
11121 find_insn_regmode_weight (insn, mode)
11122 + 2 * find_insn_regmode_weight (insn, DImode);
11126 /* Comparison function for ready queue sorting. */
11128 rank_for_reorder (const void *x, const void *y)
11130 rtx_insn *tmp = *(rtx_insn * const *) y;
11131 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11133 /* The insn in a schedule group should be issued the first. */
11134 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11135 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11137 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11138 minimizes instruction movement, thus minimizing sched's effect on
11139 register pressure. */
11140 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11143 /* Resort the array A in which only element at index N may be out of order. */
11145 swap_reorder (rtx_insn **a, int n)
11147 rtx_insn *insn = a[n - 1];
11150 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11158 /* Sort the ready list by ascending priority. */
11160 ready_reorder (rtx_insn **ready, int nready)
11163 swap_reorder (ready, nready);
11164 else if (nready > 2)
11165 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11168 /* Count life regions of r0 for a block. */
11170 find_r0_life_regions (basic_block b)
11172 rtx_insn *end, *insn;
11179 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11190 insn = BB_HEAD (b);
11192 r0_reg = gen_rtx_REG (SImode, R0_REG);
11197 if (find_regno_note (insn, REG_DEAD, R0_REG))
11203 && (pset = single_set (insn))
11204 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11205 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11213 insn = NEXT_INSN (insn);
11215 return set - death;
11218 /* Calculate regmode weights for all insns of all basic block. */
11220 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11221 int verbose ATTRIBUTE_UNUSED,
11226 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11227 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11228 r0_life_regions = 0;
11230 FOR_EACH_BB_REVERSE_FN (b, cfun)
11232 find_regmode_weight (b, SImode);
11233 find_regmode_weight (b, SFmode);
11234 if (!reload_completed)
11235 r0_life_regions += find_r0_life_regions (b);
11238 CURR_REGMODE_PRESSURE (SImode) = 0;
11239 CURR_REGMODE_PRESSURE (SFmode) = 0;
11244 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11245 int verbose ATTRIBUTE_UNUSED)
11247 if (regmode_weight[0])
11249 free (regmode_weight[0]);
11250 regmode_weight[0] = NULL;
11252 if (regmode_weight[1])
11254 free (regmode_weight[1]);
11255 regmode_weight[1] = NULL;
11259 /* The scalar modes supported differs from the default version in TImode
11260 for 32-bit SHMEDIA. */
11262 sh_scalar_mode_supported_p (enum machine_mode mode)
11264 if (TARGET_SHMEDIA32 && mode == TImode)
11267 return default_scalar_mode_supported_p (mode);
11270 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11271 keep count of register pressures on SImode and SFmode. */
11273 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11274 int sched_verbose ATTRIBUTE_UNUSED,
11276 int can_issue_more)
11278 if (GET_CODE (PATTERN (insn)) != USE
11279 && GET_CODE (PATTERN (insn)) != CLOBBER)
11280 cached_can_issue_more = can_issue_more - 1;
11282 cached_can_issue_more = can_issue_more;
11284 if (reload_completed)
11285 return cached_can_issue_more;
11287 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11288 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11290 return cached_can_issue_more;
11294 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11295 int verbose ATTRIBUTE_UNUSED,
11296 int veclen ATTRIBUTE_UNUSED)
11298 CURR_REGMODE_PRESSURE (SImode) = 0;
11299 CURR_REGMODE_PRESSURE (SFmode) = 0;
11302 /* Some magic numbers. */
11303 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11304 functions that already have high pressure on r0. */
11305 #define R0_MAX_LIFE_REGIONS 2
11306 /* Register Pressure thresholds for SImode and SFmode registers. */
11307 #define SIMODE_MAX_WEIGHT 5
11308 #define SFMODE_MAX_WEIGHT 10
11310 /* Return true if the pressure is high for MODE. */
11312 high_pressure (enum machine_mode mode)
11314 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11315 functions that already have high pressure on r0. */
11316 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11319 if (mode == SFmode)
11320 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11322 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11325 /* Reorder ready queue if register pressure is high. */
11327 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11328 int sched_verbose ATTRIBUTE_UNUSED,
11331 int clock_var ATTRIBUTE_UNUSED)
11333 if (reload_completed)
11334 return sh_issue_rate ();
11336 if (high_pressure (SFmode) || high_pressure (SImode))
11338 ready_reorder (ready, *n_readyp);
11341 return sh_issue_rate ();
11344 /* Skip cycles if the current register pressure is high. */
11346 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11347 int sched_verbose ATTRIBUTE_UNUSED,
11348 rtx_insn **ready ATTRIBUTE_UNUSED,
11349 int *n_readyp ATTRIBUTE_UNUSED,
11350 int clock_var ATTRIBUTE_UNUSED)
11352 if (reload_completed)
11353 return cached_can_issue_more;
11355 if (high_pressure(SFmode) || high_pressure (SImode))
11358 return cached_can_issue_more;
11361 /* Skip cycles without sorting the ready queue. This will move insn from
11362 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11363 queue by sh_reorder. */
11365 /* Generally, skipping these many cycles are sufficient for all insns to move
11367 #define MAX_SKIPS 8
11370 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11371 int sched_verbose ATTRIBUTE_UNUSED,
11372 rtx insn ATTRIBUTE_UNUSED,
11373 int last_clock_var,
11377 if (reload_completed)
11382 if ((clock_var - last_clock_var) < MAX_SKIPS)
11387 /* If this is the last cycle we are skipping, allow reordering of R. */
11388 if ((clock_var - last_clock_var) == MAX_SKIPS)
11400 /* SHmedia requires registers for branches, so we can't generate new
11401 branches past reload. */
11403 sh_cannot_modify_jumps_p (void)
11405 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11409 sh_target_reg_class (void)
11411 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11415 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11417 if (! shmedia_space_reserved_for_target_registers)
11419 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11422 HARD_REG_SET dummy;
11423 if (calc_live_regs (&dummy) >= 6 * 8)
11429 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11431 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11435 On the SH1..SH4, the trampoline looks like
11436 2 0002 D202 mov.l l2,r2
11437 1 0000 D301 mov.l l1,r3
11438 3 0004 422B jmp @r2
11440 5 0008 00000000 l1: .long area
11441 6 000c 00000000 l2: .long function
11443 SH5 (compact) uses r1 instead of r3 for the static chain. */
11446 /* Emit RTL insns to initialize the variable parts of a trampoline.
11447 FNADDR is an RTX for the address of the function's pure code.
11448 CXT is an RTX for the static chain value for the function. */
11450 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11452 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11453 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11455 if (TARGET_SHMEDIA64)
11460 rtx movi1 = GEN_INT (0xcc000010);
11461 rtx shori1 = GEN_INT (0xc8000010);
11464 /* The following trampoline works within a +- 128 KB range for cxt:
11465 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11466 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11467 gettr tr1,r1; blink tr0,r63 */
11468 /* Address rounding makes it hard to compute the exact bounds of the
11469 offset for this trampoline, but we have a rather generous offset
11470 range, so frame_offset should do fine as an upper bound. */
11471 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11473 /* ??? could optimize this trampoline initialization
11474 by writing DImode words with two insns each. */
11475 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11476 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11477 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11478 insn = gen_rtx_AND (DImode, insn, mask);
11479 /* Or in ptb/u .,tr1 pattern */
11480 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11481 insn = force_operand (insn, NULL_RTX);
11482 insn = gen_lowpart (SImode, insn);
11483 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11484 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11485 insn = gen_rtx_AND (DImode, insn, mask);
11486 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11487 insn = gen_lowpart (SImode, insn);
11488 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11489 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11490 insn = gen_rtx_AND (DImode, insn, mask);
11491 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11492 insn = gen_lowpart (SImode, insn);
11493 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11494 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11495 insn = gen_rtx_AND (DImode, insn, mask);
11496 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11497 insn = gen_lowpart (SImode, insn);
11498 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11499 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11500 insn = gen_rtx_AND (DImode, insn, mask);
11501 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11502 insn = gen_lowpart (SImode, insn);
11503 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11504 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11505 GEN_INT (0x6bf10600));
11506 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11507 GEN_INT (0x4415fc10));
11508 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11509 GEN_INT (0x4401fff0));
11510 emit_insn (gen_ic_invalidate_line (tramp));
11513 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11514 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11516 tramp_templ = gen_datalabel_ref (tramp_templ);
11518 src = gen_const_mem (BLKmode, tramp_templ);
11519 set_mem_align (dst, 256);
11520 set_mem_align (src, 64);
11521 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11523 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11524 emit_move_insn (adjust_address (tramp_mem, Pmode,
11525 fixed_len + GET_MODE_SIZE (Pmode)),
11527 emit_insn (gen_ic_invalidate_line (tramp));
11530 else if (TARGET_SHMEDIA)
11532 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11533 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11534 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11535 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11536 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11537 rotated 10 right, and higher 16 bit of every 32 selected. */
11539 = force_reg (V2HImode, (simplify_gen_subreg
11540 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11541 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11542 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11544 fnaddr = force_reg (SImode, fnaddr);
11545 cxt = force_reg (SImode, cxt);
11546 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11547 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11549 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11550 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11551 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11552 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11553 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11554 gen_rtx_SUBREG (V2HImode, cxt, 0),
11556 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11557 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11558 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11559 if (TARGET_LITTLE_ENDIAN)
11561 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11562 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11566 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11567 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11569 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11570 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11571 emit_insn (gen_ic_invalidate_line (tramp));
11574 else if (TARGET_SHCOMPACT)
11576 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11579 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11580 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11582 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11583 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11585 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11586 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11587 if (TARGET_HARD_SH4 || TARGET_SH5)
11589 if (!TARGET_INLINE_IC_INVALIDATE
11590 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11591 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11592 FUNCTION_ORDINARY),
11593 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11595 emit_insn (gen_ic_invalidate_line (tramp));
11599 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11601 sh_trampoline_adjust_address (rtx tramp)
11603 if (TARGET_SHMEDIA)
11604 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11605 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11609 /* FIXME: This is overly conservative. A SHcompact function that
11610 receives arguments ``by reference'' will have them stored in its
11611 own stack frame, so it must not pass pointers or references to
11612 these arguments to other functions by means of sibling calls. */
11613 /* If PIC, we cannot make sibling calls to global functions
11614 because the PLT requires r12 to be live. */
11616 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11619 && (! TARGET_SHCOMPACT
11620 || crtl->args.info.stack_regs == 0)
11621 && ! sh_cfun_interrupt_handler_p ()
11623 || (decl && ! TREE_PUBLIC (decl))
11624 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11627 /* Machine specific built-in functions. */
11629 struct builtin_description
11631 bool (* const is_enabled) (void);
11632 const enum insn_code icode;
11633 const char *const name;
11639 shmedia_builtin_p (void)
11641 return TARGET_SHMEDIA;
11644 /* This function can be used if there are any built-ins that are not for
11645 SHmedia. It's commented out to avoid the defined-but-unused warning.
11647 sh1_builtin_p (void)
11653 /* describe number and signedness of arguments; arg[0] == result
11654 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11655 /* 9: 64-bit pointer, 10: 32-bit pointer */
11656 static const char signature_args[][4] =
11658 #define SH_BLTIN_V2SI2 0
11660 #define SH_BLTIN_V4HI2 1
11662 #define SH_BLTIN_V2SI3 2
11664 #define SH_BLTIN_V4HI3 3
11666 #define SH_BLTIN_V8QI3 4
11668 #define SH_BLTIN_MAC_HISI 5
11670 #define SH_BLTIN_SH_HI 6
11672 #define SH_BLTIN_SH_SI 7
11674 #define SH_BLTIN_V4HI2V2SI 8
11676 #define SH_BLTIN_V4HI2V8QI 9
11678 #define SH_BLTIN_SISF 10
11680 #define SH_BLTIN_LDUA_L 11
11682 #define SH_BLTIN_LDUA_Q 12
11684 #define SH_BLTIN_STUA_L 13
11686 #define SH_BLTIN_STUA_Q 14
11688 #define SH_BLTIN_LDUA_L64 15
11690 #define SH_BLTIN_LDUA_Q64 16
11692 #define SH_BLTIN_STUA_L64 17
11694 #define SH_BLTIN_STUA_Q64 18
11696 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11697 #define SH_BLTIN_2 19
11698 #define SH_BLTIN_SU 19
11700 #define SH_BLTIN_3 20
11701 #define SH_BLTIN_SUS 20
11703 #define SH_BLTIN_PSSV 21
11705 #define SH_BLTIN_XXUU 22
11706 #define SH_BLTIN_UUUU 22
11708 #define SH_BLTIN_PV 23
11710 #define SH_BLTIN_VP 24
11713 /* mcmv: operands considered unsigned. */
11714 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11715 /* mperm: control value considered unsigned int. */
11716 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11717 /* mshards_q: returns signed short. */
11718 /* nsb: takes long long arg, returns unsigned char. */
11719 static struct builtin_description bdesc[] =
11721 { shmedia_builtin_p,
11722 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11747 { shmedia_builtin_p,
11748 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11749 { shmedia_builtin_p,
11750 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11751 { shmedia_builtin_p,
11752 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11753 { shmedia_builtin_p,
11754 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11755 { shmedia_builtin_p,
11756 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11757 { shmedia_builtin_p,
11758 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11759 { shmedia_builtin_p,
11760 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11761 { shmedia_builtin_p,
11762 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11763 { shmedia_builtin_p,
11764 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11765 { shmedia_builtin_p,
11766 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11767 { shmedia_builtin_p,
11768 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11769 { shmedia_builtin_p,
11770 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11771 { shmedia_builtin_p,
11772 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11773 { shmedia_builtin_p,
11774 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11775 { shmedia_builtin_p,
11776 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11777 { shmedia_builtin_p,
11778 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11779 { shmedia_builtin_p,
11780 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11781 { shmedia_builtin_p,
11782 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11783 { shmedia_builtin_p,
11784 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11785 { shmedia_builtin_p,
11786 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11787 { shmedia_builtin_p,
11788 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11789 { shmedia_builtin_p,
11790 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11791 { shmedia_builtin_p,
11792 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11793 { shmedia_builtin_p,
11794 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11795 { shmedia_builtin_p,
11796 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11797 { shmedia_builtin_p,
11798 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11799 { shmedia_builtin_p,
11800 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11801 { shmedia_builtin_p,
11802 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11803 { shmedia_builtin_p,
11804 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11805 { shmedia_builtin_p,
11806 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11807 { shmedia_builtin_p,
11808 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11809 { shmedia_builtin_p,
11810 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11811 { shmedia_builtin_p,
11812 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11813 { shmedia_builtin_p,
11814 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11815 { shmedia_builtin_p,
11816 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11817 { shmedia_builtin_p,
11818 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11819 { shmedia_builtin_p,
11820 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11821 { shmedia_builtin_p,
11822 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11823 { shmedia_builtin_p,
11824 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11825 { shmedia_builtin_p,
11826 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11827 { shmedia_builtin_p,
11828 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11855 { shmedia_builtin_p,
11856 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11857 { shmedia_builtin_p,
11858 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11859 { shmedia_builtin_p,
11860 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11861 { shmedia_builtin_p,
11862 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11863 { shmedia_builtin_p,
11864 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11865 { shmedia_builtin_p,
11866 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11867 { shmedia_builtin_p,
11868 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11869 { shmedia_builtin_p,
11870 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11871 { shmedia_builtin_p,
11872 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11873 { shmedia_builtin_p,
11874 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11875 { shmedia_builtin_p,
11876 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11877 { shmedia_builtin_p,
11878 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11879 { shmedia_builtin_p,
11880 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11881 { shmedia_builtin_p,
11882 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11883 { shmedia_builtin_p,
11884 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11885 { shmedia_builtin_p,
11886 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11890 sh_init_builtins (void)
11892 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11893 memset (shared, 0, sizeof shared);
11895 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11897 builtin_description* d = &bdesc[di];
11899 if (!d->is_enabled ())
11902 tree type, arg_type = NULL_TREE;
11903 int signature = d->signature;
11905 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11906 type = shared[signature];
11909 int has_result = signature_args[signature][0] != 0;
11912 if ((signature_args[signature][1] & 8)
11913 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11914 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11916 if (! TARGET_FPU_ANY
11917 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11919 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11920 args[i] = NULL_TREE;
11921 for (int i = 3; ; i--)
11923 int arg = signature_args[signature][i];
11924 int opno = i - 1 + has_result;
11927 arg_type = ptr_type_node;
11929 arg_type = (*lang_hooks.types.type_for_mode)
11930 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11934 arg_type = void_type_node;
11937 args[i-1] = arg_type;
11939 type = build_function_type_list (arg_type, args[0], args[1],
11940 args[2], NULL_TREE);
11941 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11942 shared[signature] = type;
11945 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11950 /* Implements target hook vector_mode_supported_p. */
11952 sh_vector_mode_supported_p (enum machine_mode mode)
11955 && ((mode == V2SFmode)
11956 || (mode == V4SFmode)
11957 || (mode == V16SFmode)))
11960 else if (TARGET_SHMEDIA
11961 && ((mode == V8QImode)
11962 || (mode == V2HImode)
11963 || (mode == V4HImode)
11964 || (mode == V2SImode)))
11971 sh_frame_pointer_required (void)
11973 /* If needed override this in other tm.h files to cope with various OS
11974 lossage requiring a frame pointer. */
11975 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11984 /* Implements target hook dwarf_calling_convention. Return an enum
11985 of dwarf_calling_convention. */
11987 sh_dwarf_calling_convention (const_tree func)
11989 if (sh_attr_renesas_p (func))
11990 return DW_CC_GNU_renesas_sh;
11992 return DW_CC_normal;
11995 /* Returns the sh builtin decl for CODE. */
11997 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11999 if (code >= ARRAY_SIZE (bdesc))
12000 return error_mark_node;
12002 if (!bdesc[code].is_enabled ())
12003 return error_mark_node;
12005 return bdesc[code].fndecl;
12008 /* Expand an expression EXP that calls a built-in function,
12009 with result going to TARGET if that's convenient
12010 (and in mode MODE if that's convenient).
12011 SUBTARGET may be used as the target for computing one of EXP's operands.
12012 IGNORE is nonzero if the value is to be ignored. */
12014 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12015 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12017 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12018 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12019 const struct builtin_description *d = &bdesc[fcode];
12020 enum insn_code icode = d->icode;
12021 int signature = d->signature;
12025 if (signature_args[signature][0])
12030 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12031 if (! target || GET_MODE (target) != tmode
12032 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12033 target = gen_reg_rtx (tmode);
12034 op[nop++] = target;
12039 for (int i = 1; i <= 3; i++, nop++)
12042 enum machine_mode opmode, argmode;
12045 if (! signature_args[signature][i])
12047 arg = CALL_EXPR_ARG (exp, i - 1);
12048 if (arg == error_mark_node)
12050 if (signature_args[signature][i] & 8)
12053 optype = ptr_type_node;
12057 opmode = insn_data[icode].operand[nop].mode;
12058 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12060 argmode = TYPE_MODE (TREE_TYPE (arg));
12061 if (argmode != opmode)
12062 arg = build1 (NOP_EXPR, optype, arg);
12063 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12064 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12065 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12068 rtx pat = NULL_RTX;
12073 pat = (*insn_data[d->icode].genfun) (op[0]);
12076 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12079 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12082 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12085 gcc_unreachable ();
12094 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12096 rtx sel0 = const0_rtx;
12097 rtx sel1 = const1_rtx;
12098 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12099 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12101 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12102 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12106 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12108 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12110 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12111 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12114 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12115 We can allow any mode in any general register. The special registers
12116 only allow SImode. Don't allow any mode in the PR.
12118 We cannot hold DCmode values in the XD registers because alter_reg
12119 handles subregs of them incorrectly. We could work around this by
12120 spacing the XD registers like the DR registers, but this would require
12121 additional memory in every compilation to hold larger register vectors.
12122 We could hold SFmode / SCmode values in XD registers, but that
12123 would require a tertiary reload when reloading from / to memory,
12124 and a secondary reload to reload from / to general regs; that
12125 seems to be a losing proposition.
12127 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12128 it won't be ferried through GP registers first. */
12130 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12132 if (SPECIAL_REGISTER_P (regno))
12133 return mode == SImode;
12135 if (regno == FPUL_REG)
12136 return (mode == SImode || mode == SFmode);
12138 if (FP_REGISTER_P (regno) && mode == SFmode)
12141 if (mode == V2SFmode)
12143 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12144 || GENERAL_REGISTER_P (regno)))
12150 if (mode == V4SFmode)
12152 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12153 || GENERAL_REGISTER_P (regno))
12159 if (mode == V16SFmode)
12161 if (TARGET_SHMEDIA)
12163 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12169 return regno == FIRST_XD_REG;
12172 if (FP_REGISTER_P (regno))
12176 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12177 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12180 && (mode == DFmode || mode == DImode
12181 || mode == V2SFmode || mode == TImode)))
12182 && ((regno - FIRST_FP_REG) & 1) == 0)
12183 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12184 && ((regno - FIRST_FP_REG) & 3) == 0))
12190 if (XD_REGISTER_P (regno))
12191 return mode == DFmode;
12193 if (TARGET_REGISTER_P (regno))
12194 return (mode == DImode || mode == SImode || mode == PDImode);
12196 if (regno == PR_REG)
12197 return mode == SImode;
12199 if (regno == FPSCR_REG)
12200 return mode == PSImode;
12202 /* FIXME. This works around PR target/37633 for -O0. */
12203 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12205 unsigned int n = GET_MODE_SIZE (mode) / 8;
12207 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12208 && regno <= FIRST_GENERAL_REG + 14)
12215 /* Return the class of registers for which a mode change from FROM to TO
12218 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12219 enum reg_class rclass)
12221 /* We want to enable the use of SUBREGs as a means to
12222 VEC_SELECT a single element of a vector. */
12224 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12225 This can be problematic when SFmode vector subregs need to be accessed
12226 on the stack with displacement addressing, as it happens with -O0.
12227 Thus we disallow the mode change for -O0. */
12228 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12229 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12231 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12233 if (TARGET_LITTLE_ENDIAN)
12235 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12236 return reg_classes_intersect_p (DF_REGS, rclass);
12240 if (GET_MODE_SIZE (from) < 8)
12241 return reg_classes_intersect_p (DF_REGS, rclass);
12247 /* Return true if registers in machine mode MODE will likely be
12248 allocated to registers in small register classes. */
12250 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12252 return (! TARGET_SHMEDIA);
12255 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12256 that label is used. */
12258 sh_mark_label (rtx address, int nuses)
12260 if (GOTOFF_P (address))
12262 /* Extract the label or symbol. */
12263 address = XEXP (address, 0);
12264 if (GET_CODE (address) == PLUS)
12265 address = XEXP (address, 0);
12266 address = XVECEXP (address, 0, 0);
12268 if (GET_CODE (address) == LABEL_REF
12269 && LABEL_P (XEXP (address, 0)))
12270 LABEL_NUSES (XEXP (address, 0)) += nuses;
12273 /* Compute extra cost of moving data between one register class
12276 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12277 uses this information. Hence, the general register <-> floating point
12278 register information here is not used for SFmode. */
12280 sh_register_move_cost (enum machine_mode mode,
12281 reg_class_t srcclass, reg_class_t dstclass)
12283 if (dstclass == T_REGS || dstclass == PR_REGS)
12286 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12289 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12290 && REGCLASS_HAS_FP_REG (srcclass)
12291 && REGCLASS_HAS_FP_REG (dstclass))
12294 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12295 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12297 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12298 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12301 if ((REGCLASS_HAS_FP_REG (dstclass)
12302 && REGCLASS_HAS_GENERAL_REG (srcclass))
12303 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12304 && REGCLASS_HAS_FP_REG (srcclass)))
12306 /* Discourage trying to use fp regs for a pointer. This also
12307 discourages fp regs with SImode because Pmode is an alias
12308 of SImode on this target. See PR target/48596. */
12309 int addend = (mode == Pmode) ? 40 : 0;
12311 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12312 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12315 if ((dstclass == FPUL_REGS
12316 && REGCLASS_HAS_GENERAL_REG (srcclass))
12317 || (srcclass == FPUL_REGS
12318 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12321 if ((dstclass == FPUL_REGS
12322 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12323 || (srcclass == FPUL_REGS
12324 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12327 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12328 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12331 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12333 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12335 if (sh_gettrcost >= 0)
12336 return sh_gettrcost;
12337 else if (!TARGET_PT_FIXED)
12341 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12342 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12347 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12348 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12349 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12351 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12355 emit_load_ptr (rtx reg, rtx addr)
12357 rtx mem = gen_const_mem (ptr_mode, addr);
12359 if (Pmode != ptr_mode)
12360 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12361 return emit_move_insn (reg, mem);
12365 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12366 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12369 CUMULATIVE_ARGS cum;
12370 int structure_value_byref = 0;
12371 rtx this_rtx, this_value, sibcall, funexp;
12373 tree funtype = TREE_TYPE (function);
12374 int simple_add = CONST_OK_FOR_ADD (delta);
12376 rtx scratch0, scratch1, scratch2;
12379 reload_completed = 1;
12380 epilogue_completed = 1;
12381 crtl->uses_only_leaf_regs = 1;
12383 emit_note (NOTE_INSN_PROLOGUE_END);
12385 /* Find the "this" pointer. We have such a wide range of ABIs for the
12386 SH that it's best to do this completely machine independently.
12387 "this" is passed as first argument, unless a structure return pointer
12388 comes first, in which case "this" comes second. */
12389 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12390 #ifndef PCC_STATIC_STRUCT_RETURN
12391 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12392 structure_value_byref = 1;
12393 #endif /* not PCC_STATIC_STRUCT_RETURN */
12394 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12396 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12398 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12401 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12403 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12404 static chain pointer (even if you can't have nested virtual functions
12405 right now, someone might implement them sometime), and the rest of the
12406 registers are used for argument passing, are callee-saved, or reserved. */
12407 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12408 -ffixed-reg has been used. */
12409 if (! call_used_regs[0] || fixed_regs[0])
12410 error ("r0 needs to be available as a call-clobbered register");
12411 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12414 if (call_used_regs[1] && ! fixed_regs[1])
12415 scratch1 = gen_rtx_REG (ptr_mode, 1);
12416 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12417 pointing where to return struct values. */
12418 if (call_used_regs[3] && ! fixed_regs[3])
12419 scratch2 = gen_rtx_REG (Pmode, 3);
12421 else if (TARGET_SHMEDIA)
12423 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12424 if (i != REGNO (scratch0) &&
12425 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12427 scratch1 = gen_rtx_REG (ptr_mode, i);
12430 if (scratch1 == scratch0)
12431 error ("need a second call-clobbered general purpose register");
12432 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12433 if (call_used_regs[i] && ! fixed_regs[i])
12435 scratch2 = gen_rtx_REG (Pmode, i);
12438 if (scratch2 == scratch0)
12439 error ("need a call-clobbered target register");
12442 this_value = plus_constant (Pmode, this_rtx, delta);
12444 && (simple_add || scratch0 != scratch1)
12445 && strict_memory_address_p (ptr_mode, this_value))
12447 emit_load_ptr (scratch0, this_value);
12452 ; /* Do nothing. */
12453 else if (simple_add)
12454 emit_move_insn (this_rtx, this_value);
12457 emit_move_insn (scratch1, GEN_INT (delta));
12458 emit_insn (gen_add2_insn (this_rtx, scratch1));
12466 emit_load_ptr (scratch0, this_rtx);
12468 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12469 if (strict_memory_address_p (ptr_mode, offset_addr))
12470 ; /* Do nothing. */
12471 else if (! TARGET_SH5 && scratch0 != scratch1)
12473 /* scratch0 != scratch1, and we have indexed loads. Get better
12474 schedule by loading the offset into r1 and using an indexed
12475 load - then the load of r1 can issue before the load from
12476 (this_rtx + delta) finishes. */
12477 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12478 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12480 else if (CONST_OK_FOR_ADD (vcall_offset))
12482 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12483 offset_addr = scratch0;
12485 else if (scratch0 != scratch1)
12487 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12488 emit_insn (gen_add2_insn (scratch0, scratch1));
12489 offset_addr = scratch0;
12492 gcc_unreachable (); /* FIXME */
12493 emit_load_ptr (scratch0, offset_addr);
12495 if (Pmode != ptr_mode)
12496 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12497 emit_insn (gen_add2_insn (this_rtx, scratch0));
12500 /* Generate a tail call to the target function. */
12501 if (! TREE_USED (function))
12503 assemble_external (function);
12504 TREE_USED (function) = 1;
12506 funexp = XEXP (DECL_RTL (function), 0);
12507 /* If the function is overridden, so is the thunk, hence we don't
12508 need GOT addressing even if this is a public symbol. */
12510 if (TARGET_SH1 && ! flag_weak)
12511 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12514 if (TARGET_SH2 && flag_pic)
12516 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12517 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12521 if (TARGET_SHMEDIA && flag_pic)
12523 funexp = gen_sym2PIC (funexp);
12524 PUT_MODE (funexp, Pmode);
12526 emit_move_insn (scratch2, funexp);
12527 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12528 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12530 sibcall = emit_call_insn (sibcall);
12531 SIBLING_CALL_P (sibcall) = 1;
12532 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12535 /* Run just enough of rest_of_compilation to do scheduling and get
12536 the insns emitted. Note that use_thunk calls
12537 assemble_start_function and assemble_end_function. */
12539 insns = get_insns ();
12545 split_all_insns_noflow ();
12549 shorten_branches (insns);
12550 final_start_function (insns, file, 1);
12551 final (insns, file, 1);
12552 final_end_function ();
12554 reload_completed = 0;
12555 epilogue_completed = 0;
12559 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12563 /* If this is not an ordinary function, the name usually comes from a
12564 string literal or an sprintf buffer. Make sure we use the same
12565 string consistently, so that cse will be able to unify address loads. */
12566 if (kind != FUNCTION_ORDINARY)
12567 name = IDENTIFIER_POINTER (get_identifier (name));
12568 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12569 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12573 case FUNCTION_ORDINARY:
12577 rtx reg = target ? target : gen_reg_rtx (Pmode);
12579 emit_insn (gen_symGOT2reg (reg, sym));
12585 /* ??? To allow cse to work, we use GOTOFF relocations.
12586 We could add combiner patterns to transform this into
12587 straight pc-relative calls with sym2PIC / bsrf when
12588 label load and function call are still 1:1 and in the
12589 same basic block during combine. */
12590 rtx reg = target ? target : gen_reg_rtx (Pmode);
12592 emit_insn (gen_symGOTOFF2reg (reg, sym));
12597 if (target && sym != target)
12599 emit_move_insn (target, sym);
12605 /* Find the number of a general purpose register in S. */
12607 scavenge_reg (HARD_REG_SET *s)
12610 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12611 if (TEST_HARD_REG_BIT (*s, r))
12617 sh_get_pr_initial_val (void)
12621 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12622 PR register on SHcompact, because it might be clobbered by the prologue.
12623 We check first if that is known to be the case. */
12624 if (TARGET_SHCOMPACT
12625 && ((crtl->args.info.call_cookie
12626 & ~ CALL_COOKIE_RET_TRAMP (1))
12627 || crtl->saves_all_registers))
12628 return gen_frame_mem (SImode, return_address_pointer_rtx);
12630 /* If we haven't finished rtl generation, there might be a nonlocal label
12631 that we haven't seen yet.
12632 ??? get_hard_reg_initial_val fails if it is called after register
12633 allocation has started, unless it has been called before for the
12634 same register. And even then, we end in trouble if we didn't use
12635 the register in the same basic block before. So call
12636 get_hard_reg_initial_val now and wrap it in an unspec if we might
12637 need to replace it. */
12638 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12639 combine can put the pseudo returned by get_hard_reg_initial_val into
12640 instructions that need a general purpose registers, which will fail to
12641 be recognized when the pseudo becomes allocated to PR. */
12643 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12645 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12650 sh_expand_t_scc (rtx operands[])
12652 enum rtx_code code = GET_CODE (operands[1]);
12653 rtx target = operands[0];
12654 rtx op0 = operands[2];
12655 rtx op1 = operands[3];
12656 rtx result = target;
12659 if (!REG_P (op0) || REGNO (op0) != T_REG
12660 || !CONST_INT_P (op1))
12662 if (!REG_P (result))
12663 result = gen_reg_rtx (SImode);
12664 val = INTVAL (op1);
12665 if ((code == EQ && val == 1) || (code == NE && val == 0))
12666 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12667 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12668 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12669 else if (code == EQ || code == NE)
12670 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12673 if (result != target)
12674 emit_move_insn (target, result);
12678 /* INSN is an sfunc; return the rtx that describes the address used. */
12680 extract_sfunc_addr (rtx insn)
12682 rtx pattern, part = NULL_RTX;
12685 pattern = PATTERN (insn);
12686 len = XVECLEN (pattern, 0);
12687 for (i = 0; i < len; i++)
12689 part = XVECEXP (pattern, 0, i);
12690 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12691 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12692 return XEXP (part, 0);
12694 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12695 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12698 /* Verify that the register in use_sfunc_addr still agrees with the address
12699 used in the sfunc. This prevents fill_slots_from_thread from changing
12701 INSN is the use_sfunc_addr instruction, and REG is the register it
12704 check_use_sfunc_addr (rtx insn, rtx reg)
12706 /* Search for the sfunc. It should really come right after INSN. */
12707 while ((insn = NEXT_INSN (insn)))
12709 if (LABEL_P (insn) || JUMP_P (insn))
12711 if (! INSN_P (insn))
12714 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12715 insn = XVECEXP (PATTERN (insn), 0, 0);
12716 if (GET_CODE (PATTERN (insn)) != PARALLEL
12717 || get_attr_type (insn) != TYPE_SFUNC)
12719 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12721 gcc_unreachable ();
12724 /* This function returns a constant rtx that represents 2**15 / pi in
12725 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12726 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12727 static GTY(()) rtx sh_fsca_sf2int_rtx;
12730 sh_fsca_sf2int (void)
12732 if (! sh_fsca_sf2int_rtx)
12734 REAL_VALUE_TYPE rv;
12736 real_from_string (&rv, "10430.378350470453");
12737 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12740 return sh_fsca_sf2int_rtx;
12743 /* This function returns a constant rtx that represents pi / 2**15 in
12744 SFmode. It's used to scale SFmode angles, in radians, to a
12745 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12746 maps to 0x10000. */
12747 static GTY(()) rtx sh_fsca_int2sf_rtx;
12750 sh_fsca_int2sf (void)
12752 if (! sh_fsca_int2sf_rtx)
12754 REAL_VALUE_TYPE rv;
12756 real_from_string (&rv, "9.587379924285257e-5");
12757 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12760 return sh_fsca_int2sf_rtx;
12763 /* Initialize the CUMULATIVE_ARGS structure. */
12765 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12767 rtx libname ATTRIBUTE_UNUSED,
12769 signed int n_named_args,
12770 enum machine_mode mode)
12772 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12773 pcum->free_single_fp_reg = 0;
12774 pcum->stack_regs = 0;
12775 pcum->byref_regs = 0;
12777 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12779 /* XXX - Should we check TARGET_HITACHI here ??? */
12780 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12784 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12785 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12786 pcum->prototype_p = prototype_p (fntype);
12787 pcum->arg_count [(int) SH_ARG_INT]
12788 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12791 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12792 && pcum->arg_count [(int) SH_ARG_INT] == 0
12793 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12794 ? int_size_in_bytes (TREE_TYPE (fntype))
12795 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12796 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12797 == FIRST_RET_REG));
12801 pcum->arg_count [(int) SH_ARG_INT] = 0;
12802 pcum->prototype_p = FALSE;
12803 if (mode != VOIDmode)
12805 pcum->call_cookie =
12806 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12807 && GET_MODE_SIZE (mode) > 4
12808 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12810 /* If the default ABI is the Renesas ABI then all library
12811 calls must assume that the library will be using the
12812 Renesas ABI. So if the function would return its result
12813 in memory then we must force the address of this memory
12814 block onto the stack. Ideally we would like to call
12815 targetm.calls.return_in_memory() here but we do not have
12816 the TYPE or the FNDECL available so we synthesize the
12817 contents of that function as best we can. */
12819 (TARGET_DEFAULT & MASK_HITACHI)
12820 && (mode == BLKmode
12821 || (GET_MODE_SIZE (mode) > 4
12822 && !(mode == DFmode
12823 && TARGET_FPU_DOUBLE)));
12827 pcum->call_cookie = 0;
12828 pcum->force_mem = FALSE;
12833 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12834 not enter into CONST_DOUBLE for the replace.
12836 Note that copying is not done so X must not be shared unless all copies
12837 are to be modified.
12839 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12840 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12841 replacements[n*2+1] - and that we take mode changes into account.
12843 If a replacement is ambiguous, return NULL_RTX.
12845 If MODIFY is zero, don't modify any rtl in place,
12846 just return zero or nonzero for failure / success. */
12848 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12853 /* The following prevents loops occurrence when we change MEM in
12854 CONST_DOUBLE onto the same CONST_DOUBLE. */
12855 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12858 for (i = n_replacements - 1; i >= 0 ; i--)
12859 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12860 return replacements[i*2+1];
12862 /* Allow this function to make replacements in EXPR_LISTs. */
12866 if (GET_CODE (x) == SUBREG)
12868 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12869 n_replacements, modify);
12871 if (CONST_INT_P (new_rtx))
12873 x = simplify_subreg (GET_MODE (x), new_rtx,
12874 GET_MODE (SUBREG_REG (x)),
12880 SUBREG_REG (x) = new_rtx;
12884 else if (REG_P (x))
12886 unsigned regno = REGNO (x);
12887 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12888 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12889 rtx result = NULL_RTX;
12891 for (i = n_replacements - 1; i >= 0; i--)
12893 rtx from = replacements[i*2];
12894 rtx to = replacements[i*2+1];
12895 unsigned from_regno, from_nregs, to_regno, new_regno;
12899 from_regno = REGNO (from);
12900 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12901 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12902 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12904 if (regno < from_regno
12905 || regno + nregs > from_regno + nregs
12909 to_regno = REGNO (to);
12910 if (to_regno < FIRST_PSEUDO_REGISTER)
12912 new_regno = regno + to_regno - from_regno;
12913 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12916 result = gen_rtx_REG (GET_MODE (x), new_regno);
12918 else if (GET_MODE (x) <= GET_MODE (to))
12919 result = gen_lowpart_common (GET_MODE (x), to);
12921 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12924 return result ? result : x;
12926 else if (GET_CODE (x) == ZERO_EXTEND)
12928 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12929 n_replacements, modify);
12931 if (CONST_INT_P (new_rtx))
12933 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12934 new_rtx, GET_MODE (XEXP (x, 0)));
12939 XEXP (x, 0) = new_rtx;
12944 fmt = GET_RTX_FORMAT (GET_CODE (x));
12945 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12951 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12952 n_replacements, modify);
12956 XEXP (x, i) = new_rtx;
12958 else if (fmt[i] == 'E')
12959 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12961 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12962 n_replacements, modify);
12966 XVECEXP (x, i, j) = new_rtx;
12974 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12976 enum rtx_code code = TRUNCATE;
12978 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12980 rtx inner = XEXP (x, 0);
12981 enum machine_mode inner_mode = GET_MODE (inner);
12983 if (inner_mode == mode)
12985 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12987 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12988 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12990 code = GET_CODE (x);
12994 return gen_rtx_fmt_e (code, mode, x);
12997 /* Called via for_each_rtx after reload, to clean up truncates of
12998 registers that span multiple actual hard registers. */
13000 shmedia_cleanup_truncate (rtx *p, void *n_changes)
13004 if (GET_CODE (x) != TRUNCATE)
13007 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
13009 enum machine_mode reg_mode = GET_MODE (reg);
13010 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
13011 subreg_lowpart_offset (DImode, reg_mode));
13012 *(int*) n_changes += 1;
13018 /* Load and store depend on the highpart of the address. However,
13019 set_attr_alternative does not give well-defined results before reload,
13020 so we must look at the rtl ourselves to see if any of the feeding
13021 registers is used in a memref.
13023 Called by sh_contains_memref_p via for_each_rtx. */
13025 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
13027 return (MEM_P (*loc));
13030 /* Return true iff INSN contains a MEM. */
13032 sh_contains_memref_p (rtx insn)
13034 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
13037 /* Return true iff INSN loads a banked register. */
13039 sh_loads_bankedreg_p (rtx insn)
13041 if (GET_CODE (PATTERN (insn)) == SET)
13043 rtx op = SET_DEST (PATTERN(insn));
13044 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13051 /* FNADDR is the MEM expression from a call expander. Return an address
13052 to use in an SHmedia insn pattern. */
13054 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13058 fnaddr = XEXP (fnaddr, 0);
13059 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13060 if (flag_pic && is_sym)
13062 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13064 rtx reg = gen_reg_rtx (Pmode);
13066 /* We must not use GOTPLT for sibcalls, because PIC_REG
13067 must be restored before the PLT code gets to run. */
13069 emit_insn (gen_symGOT2reg (reg, fnaddr));
13071 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13076 fnaddr = gen_sym2PIC (fnaddr);
13077 PUT_MODE (fnaddr, Pmode);
13080 /* If ptabs might trap, make this visible to the rest of the compiler.
13081 We generally assume that symbols pertain to valid locations, but
13082 it is possible to generate invalid symbols with asm or linker tricks.
13083 In a list of functions where each returns its successor, an invalid
13084 symbol might denote an empty list. */
13085 if (!TARGET_PT_FIXED
13086 && (!is_sym || TARGET_INVALID_SYMBOLS)
13087 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13089 rtx tr = gen_reg_rtx (PDImode);
13091 emit_insn (gen_ptabs (tr, fnaddr));
13094 else if (! target_reg_operand (fnaddr, Pmode))
13095 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13099 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13101 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13103 if (rclass == NO_REGS
13105 && (CONST_DOUBLE_P (x)
13106 || GET_CODE (x) == SYMBOL_REF
13107 || PIC_ADDR_P (x)))
13108 return GENERAL_REGS;
13113 /* Implement TARGET_SECONDARY_RELOAD. */
13115 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13116 enum machine_mode mode, secondary_reload_info *sri)
13118 enum reg_class rclass = (enum reg_class) rclass_i;
13120 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13121 && REG_P (XEXP (XEXP (x, 0), 0))
13122 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13123 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13125 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13126 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13128 if (REG_P (x) && REGNO (x) == GBR_REG)
13133 if (REGCLASS_HAS_FP_REG (rclass)
13134 && ! TARGET_SHMEDIA
13135 && immediate_operand ((x), mode)
13136 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13137 && mode == SFmode && fldi_ok ()))
13141 sri->icode = CODE_FOR_reload_insf__frn;
13144 sri->icode = CODE_FOR_reload_indf__frn;
13147 /* ??? If we knew that we are in the appropriate mode -
13148 single precision - we could use a reload pattern directly. */
13153 if (rclass == FPUL_REGS
13154 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13155 || REGNO (x) == T_REG))
13156 || GET_CODE (x) == PLUS))
13157 return GENERAL_REGS;
13158 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13160 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13161 return GENERAL_REGS;
13162 else if (mode == SFmode)
13164 sri->icode = CODE_FOR_reload_insi__i_fpul;
13167 if (rclass == FPSCR_REGS
13168 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13169 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13170 return GENERAL_REGS;
13171 if (REGCLASS_HAS_FP_REG (rclass)
13173 && immediate_operand (x, mode)
13174 && x != CONST0_RTX (GET_MODE (x))
13175 && GET_MODE (x) != V4SFmode)
13176 return GENERAL_REGS;
13177 if ((mode == QImode || mode == HImode)
13178 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13180 sri->icode = ((mode == QImode)
13181 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13184 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13185 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13186 return TARGET_REGS;
13187 } /* end of input-only processing. */
13189 if (((REGCLASS_HAS_FP_REG (rclass)
13191 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13192 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13193 && TARGET_FMOVD))))
13194 || (REGCLASS_HAS_GENERAL_REG (rclass)
13196 && FP_REGISTER_P (REGNO (x))))
13197 && ! TARGET_SHMEDIA
13198 && (mode == SFmode || mode == SImode))
13200 if ((rclass == FPUL_REGS
13201 || (REGCLASS_HAS_FP_REG (rclass)
13202 && ! TARGET_SHMEDIA && mode == SImode))
13205 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13206 || REGNO (x) == T_REG
13207 || system_reg_operand (x, VOIDmode)))))
13209 if (rclass == FPUL_REGS)
13210 return GENERAL_REGS;
13213 if ((rclass == TARGET_REGS
13214 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13215 && !satisfies_constraint_Csy (x)
13216 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13217 return GENERAL_REGS;
13218 if ((rclass == MAC_REGS || rclass == PR_REGS)
13219 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13220 && rclass != REGNO_REG_CLASS (REGNO (x)))
13221 return GENERAL_REGS;
13222 if (rclass != GENERAL_REGS && REG_P (x)
13223 && TARGET_REGISTER_P (REGNO (x)))
13224 return GENERAL_REGS;
13226 /* If here fall back to loading FPUL register through general registers.
13227 This case can happen when movsi_ie insn is picked initially to
13228 load/store the FPUL register from/to another register, and then the
13229 other register is allocated on the stack. */
13230 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13231 return GENERAL_REGS;
13233 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13235 On SH2A could also just leave it alone here, which would result in a
13236 4 byte move insn being generated instead. However, for this to work
13237 the insns must have the appropriate alternatives. */
13238 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13239 && satisfies_constraint_Sdd (x)
13240 && sh_disp_addr_displacement (x)
13241 <= sh_max_mov_insn_displacement (mode, false))
13244 /* When reload is trying to address a QImode or HImode subreg on the stack,
13245 force any subreg byte into R0_REGS, as this is going to become a
13246 displacement address.
13247 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13248 is on the stack, the memref to it might already require a displacement
13249 and that has to be added to the final address. At this point we don't
13250 know the cumulative displacement so we assume the worst case. */
13251 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13252 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13259 sh_conditional_register_usage (void)
13262 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13263 if (! VALID_REGISTER_P (regno))
13264 fixed_regs[regno] = call_used_regs[regno] = 1;
13265 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13268 call_used_regs[FIRST_GENERAL_REG + 8]
13269 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13270 call_really_used_regs[FIRST_GENERAL_REG + 8]
13271 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13273 if (TARGET_SHMEDIA)
13275 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13276 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13277 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13281 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13282 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13284 /* Renesas saves and restores mac registers on call. */
13285 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13287 call_really_used_regs[MACH_REG] = 0;
13288 call_really_used_regs[MACL_REG] = 0;
13291 if (TARGET_SHMEDIA)
13293 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13294 if (! fixed_regs[regno] && call_really_used_regs[regno])
13295 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13298 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13299 if (! fixed_regs[regno] && call_really_used_regs[regno])
13300 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13303 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13305 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13307 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13309 return (TARGET_SHMEDIA
13310 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13311 || x == CONST0_RTX (mode)
13312 || !TARGET_SHMEDIA_FPU
13313 || TARGET_SHMEDIA64)
13314 : (GET_CODE (x) != CONST_DOUBLE
13315 || mode == DFmode || mode == SFmode
13316 || mode == DImode || GET_MODE (x) == VOIDmode));
13319 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13322 sh_init_sync_libfuncs (void)
13324 init_sync_libfuncs (UNITS_PER_WORD);
13327 /* Return true if it is appropriate to emit `ret' instructions in the
13328 body of a function. */
13330 sh_can_use_simple_return_p (void)
13332 HARD_REG_SET live_regs_mask;
13335 /* Some targets require special return insns. */
13337 || (TARGET_SHCOMPACT
13338 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13341 if (! reload_completed || frame_pointer_needed)
13344 /* Moving prologue around does't reduce the size. */
13345 if (optimize_function_for_size_p (cfun))
13348 /* Finally, allow for pr save. */
13349 d = calc_live_regs (&live_regs_mask);
13351 if (rounded_frame_size (d) > 4)
13357 /*------------------------------------------------------------------------------
13358 Address mode optimization support code
13361 typedef HOST_WIDE_INT disp_t;
13362 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13363 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13364 static const disp_t INVALID_DISP = MAX_DISP;
13366 /* A memory reference which is described by a base register and a
13368 class base_reg_disp
13371 base_reg_disp (rtx br, disp_t d);
13373 bool is_reg (void) const;
13374 bool is_disp (void) const;
13375 rtx reg (void) const;
13376 disp_t disp (void) const;
13384 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13385 : reg_ (br), disp_ (d)
13390 base_reg_disp::is_reg (void) const
13392 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13396 base_reg_disp::is_disp (void) const
13398 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13402 base_reg_disp::reg (void) const
13408 base_reg_disp::disp (void) const
13413 /* Find the base register and calculate the displacement for a given
13415 This is done by walking the insn list backwards and following SET insns
13416 that set the value of the specified reg 'x'. */
13417 static base_reg_disp
13418 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13422 if (REGNO (x) == GBR_REG)
13423 return base_reg_disp (x, disp);
13425 /* We've reached a hard-reg. This is probably the point where
13426 function args are copied to pseudos. Do not go any further and
13427 stick to the pseudo. If the original mem addr was in a hard reg
13428 from the beginning, it will become the base reg. */
13429 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13430 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13432 /* Try to find the previous insn that sets the reg. */
13433 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13434 i = prev_nonnote_insn (i))
13436 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13440 if (!NONJUMP_INSN_P (i))
13443 rtx p = PATTERN (i);
13444 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13445 && REGNO (XEXP (p, 0)) == REGNO (x))
13447 /* If the recursion can't find out any more details about the
13448 source of the set, then this reg becomes our new base reg. */
13449 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13453 /* When here, no previous insn was found that sets the reg.
13454 The input reg is already the base reg. */
13455 return base_reg_disp (x, disp);
13458 else if (GET_CODE (x) == PLUS)
13460 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13461 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13463 /* Either left or right val must be a reg.
13464 We don't handle the case of 'reg + reg' here. */
13465 if (left_val.is_reg () && right_val.is_disp ())
13466 return base_reg_disp (left_val.reg (), left_val.disp ()
13467 + right_val.disp () + disp);
13468 else if (right_val.is_reg () && left_val.is_disp ())
13469 return base_reg_disp (right_val.reg (), right_val.disp ()
13470 + left_val.disp () + disp);
13472 return base_reg_disp (base_reg, disp);
13475 else if (CONST_INT_P (x))
13476 return base_reg_disp (NULL, disp + INTVAL (x));
13478 /* Didn't find anything useful. */
13479 return base_reg_disp (base_reg, disp);
13482 /* Given an insn and a memory operand, try to find an equivalent GBR
13483 based memory address and return the corresponding new memory address.
13484 Return NULL_RTX if not found. */
13486 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13491 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13492 if (side_effects_p (XEXP (mem, 0)))
13495 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13497 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13499 rtx disp = GEN_INT (gbr_disp.disp ());
13500 if (gbr_displacement (disp, GET_MODE (mem)))
13501 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13507 /*------------------------------------------------------------------------------
13508 Manual insn combine support code.
13511 /* Given a reg rtx and a start insn, try to find the insn that sets the
13512 specified reg by using the specified insn stepping function, such as
13513 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13516 sh_find_set_of_reg (rtx reg, rtx insn, rtx_insn *(*stepfunc)(rtx))
13519 result.insn = insn;
13520 result.set_rtx = NULL_RTX;
13521 result.set_src = NULL_RTX;
13523 if (!REG_P (reg) || insn == NULL_RTX)
13526 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13527 result.insn = stepfunc (result.insn))
13529 if (BARRIER_P (result.insn))
13531 if (!NONJUMP_INSN_P (result.insn))
13533 if (reg_set_p (reg, result.insn))
13535 result.set_rtx = set_of (reg, result.insn);
13537 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13540 result.set_src = XEXP (result.set_rtx, 1);
13548 /* Given an op rtx and an insn, try to find out whether the result of the
13549 specified op consists only of logical operations on T bit stores. */
13551 sh_is_logical_t_store_expr (rtx op, rtx insn)
13553 if (!logical_operator (op, SImode))
13556 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13557 int op_is_t_count = 0;
13559 for (int i = 0; i < 2; ++i)
13561 if (t_reg_operand (ops[i], VOIDmode)
13562 || negt_reg_operand (ops[i], VOIDmode))
13567 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13568 prev_nonnote_insn_bb);
13569 if (op_set.set_src == NULL_RTX)
13572 if (t_reg_operand (op_set.set_src, VOIDmode)
13573 || negt_reg_operand (op_set.set_src, VOIDmode)
13574 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13579 return op_is_t_count == 2;
13582 /* Given the operand that is extended in a sign/zero extend insn, and the
13583 insn, try to figure out whether the sign/zero extension can be replaced
13584 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13585 NULL_RTX otherwise. */
13587 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13589 if (REG_P (extended_op))
13590 extended_op = extended_op;
13591 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13592 extended_op = SUBREG_REG (extended_op);
13596 /* Reg moves must be of the same mode. */
13597 if (GET_MODE (extended_op) != SImode)
13600 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13601 if (s.set_src == NULL_RTX)
13604 if (t_reg_operand (s.set_src, VOIDmode)
13605 || negt_reg_operand (s.set_src, VOIDmode))
13606 return extended_op;
13608 /* If the zero extended reg was formed by a logical operation, check the
13609 operands of the logical operation. If both originated from T bit
13610 stores the zero extension can be eliminated. */
13611 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13612 return extended_op;
13618 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13619 int prev_mode, HARD_REG_SET regs_live)
13621 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13622 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13624 emit_insn (gen_toggle_pr ());
13626 emit_insn (gen_toggle_sz ());
13629 fpscr_set_from_mem (mode, regs_live);
13633 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx insn)
13635 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13639 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx insn)
13641 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13642 get_attr_fp_set (insn) != FP_SET_NONE)
13643 return (int) get_attr_fp_set (insn);
13649 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13651 return NORMAL_MODE (entity);
13655 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13657 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13661 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13663 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);