+2000-03-24 Jakub Jelinek <jakub@redhat.com>
+
+ * sibcall.c (skip_copy_to_return_value): Use OUTGOING_REGNO for
+ comparison if regno's are equal.
+ * calls.c (initialize_argument_informat): Add ecf_flags argument.
+ Use FUNCTION_INCOMING_ARG if available and ECF_SIBCALL.
+ (expand_call): Update caller.
+ Avoid making a sibling call if argument size of the callee is larger
+ than argument size of the caller.
+ Call hard_function_value with outgoing set if in sibcall pass.
+ Use FUNCTION_INCOMING_ARG if available and ECF_SIBCALL.
+
+ * final.c (permitted_reg_in_leaf_functions, only_leaf_regs_used):
+ Change LEAF_REGISTERS from an array initializer to actual array
+ identifier. Move static global variable into the function.
+ (leaf_function_p): Allow SIBLING_CALL_P calls even outside of
+ sequences for leaf functions.
+ * global.c (global_alloc): Likewise.
+ * tm.texi (LEAF_REGISTERS): Update documentation.
+
+ * config/sparc/sparc.h (CONDITIONAL_REGISTER_USAGE): Remove the ugly
+ TARGET_FLAT leaf disabling hack.
+ (LEAF_REGISTERS): Changed from an array initializer to actual array
+ identifier to avoid duplication and remove the above hack.
+ (FUNCTION_OK_FOR_SIBCALL): Define.
+ * config/sparc/sparc.md (sibcall): New attr type. Use it almost
+ always like call attribute.
+ (eligible_for_sibcall_delay): New attribute.
+ (sibcall): New delay type.
+ (sibcall, sibcall_value, sibcall_epilogue): New expands.
+ (sibcall_symbolic_sp32, sibcall_symbolic_sp64,
+ sibcall_value_symbolic_sp32, sibcall_value_symbolic_sp64): New insns.
+ * config/sparc/sparc.c (sparc_leaf_regs): New array.
+ (eligible_for_sibcall_delay, output_restore_regs, output_sibcall):
+ New functions.
+ (output_function_epilogue): Move part of the code into
+ output_restore_regs.
+ (ultra_code_from_mask, ultrasparc_sched_reorder): Handle
+ TYPE_SIBCALL.
+ * sparc-protos.h (output_sibcall, eligible_for_sibcall_delay): New
+ prototypes.
+
Fri Mar 24 13:49:45 2000 Jeffrey A Law (law@cygnus.com)
* integrate.c (save_for_inline_nocopy): Clear in_nonparm_insns here.
int, tree, tree,
CUMULATIVE_ARGS *,
int, rtx *, int *,
- int *, int *));
+ int *, int *, int));
static void compute_argument_addresses PARAMS ((struct arg_data *,
rtx, int));
static rtx rtx_for_function_call PARAMS ((tree, tree));
initialize_argument_information (num_actuals, args, args_size, n_named_args,
actparms, fndecl, args_so_far,
reg_parm_stack_space, old_stack_level,
- old_pending_adj, must_preallocate, is_const)
+ old_pending_adj, must_preallocate, is_const,
+ ecf_flags)
int num_actuals ATTRIBUTE_UNUSED;
struct arg_data *args;
struct args_size *args_size;
int *old_pending_adj;
int *must_preallocate;
int *is_const;
+ int ecf_flags;
{
/* 1 if scanning parms front to back, -1 if scanning back to front. */
int inc;
args[i].unsignedp = unsignedp;
args[i].mode = mode;
- args[i].reg = FUNCTION_ARG (*args_so_far, mode, type,
- argpos < n_named_args);
+
+#ifdef FUNCTION_INCOMING_ARG
+ /* If this is a sibling call and the machine has register windows, the
+ register window has to be unwinded before calling the routine, so
+ arguments have to go into the incoming registers. */
+ if (ecf_flags & ECF_SIBCALL)
+ args[i].reg = FUNCTION_INCOMING_ARG (*args_so_far, mode, type,
+ argpos < n_named_args);
+ else
+#endif
+ args[i].reg = FUNCTION_ARG (*args_so_far, mode, type,
+ argpos < n_named_args);
+
#ifdef FUNCTION_ARG_PARTIAL_NREGS
if (args[i].reg)
args[i].partial
call expansion. */
int save_pending_stack_adjust;
rtx insns;
- rtx before_call;
+ rtx before_call, next_arg_reg;
if (pass == 0)
{
n_named_args, actparms, fndecl,
&args_so_far, reg_parm_stack_space,
&old_stack_level, &old_pending_adj,
- &must_preallocate, &is_const);
+ &must_preallocate, &is_const,
+ (pass == 0) ? ECF_SIBCALL : 0);
#ifdef FINAL_REG_PARM_STACK_SPACE
reg_parm_stack_space = FINAL_REG_PARM_STACK_SPACE (args_size.constant,
sibcall_failure = 1;
}
+ if (args_size.constant > current_function_args_size)
+ {
+ /* If this function requires more stack slots than the current
+ function, we cannot change it into a sibling call. */
+ sibcall_failure = 1;
+ }
+
/* Compute the actual size of the argument block required. The variable
and constant sizes must be combined, the size may have to be rounded,
and there may be a minimum required size. When generating a sibcall
{
if (pcc_struct_value)
valreg = hard_function_value (build_pointer_type (TREE_TYPE (exp)),
- fndecl, 0);
+ fndecl, (pass == 0));
else
- valreg = hard_function_value (TREE_TYPE (exp), fndecl, 0);
+ valreg = hard_function_value (TREE_TYPE (exp), fndecl, (pass == 0));
}
/* Precompute all register parameters. It isn't safe to compute anything
later safely search backwards to find the CALL_INSN. */
before_call = get_last_insn ();
+ /* Set up next argument register. For sibling calls on machines
+ with register windows this should be the incoming register. */
+#ifdef FUNCTION_INCOMING_ARG
+ if (pass == 0)
+ next_arg_reg = FUNCTION_INCOMING_ARG (args_so_far, VOIDmode,
+ void_type_node, 1);
+ else
+#endif
+ next_arg_reg = FUNCTION_ARG (args_so_far, VOIDmode,
+ void_type_node, 1);
+
/* All arguments and registers used for the call must be set up by
now! */
/* Generate the actual call instruction. */
emit_call_1 (funexp, fndecl, funtype, unadjusted_args_size,
args_size.constant, struct_value_size,
- FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1),
- valreg, old_inhibit_defer_pop, call_fusage,
+ next_arg_reg, valreg, old_inhibit_defer_pop, call_fusage,
((is_const ? ECF_IS_CONST : 0)
| (nothrow ? ECF_NOTHROW : 0)
| (pass == 0 ? ECF_SIBCALL : 0)));
{
tail_call_insns = insns;
- /* If the current function's argument block is not large enough
- to hold the outoing arguments, or we encountered some other
- situation we couldn't handle, zero out the sequence. */
- if (current_function_args_size < args_size.constant
- || sibcall_failure)
+ /* If something prevents making this a sibling call,
+ zero out the sequence. */
+ if (sibcall_failure)
tail_call_insns = NULL_RTX;
/* Restore the pending stack adjustment now that we have
extern int sparc_absnegfloat_split_legitimate PARAMS ((rtx, rtx));
extern char *output_cbranch PARAMS ((rtx, int, int, int, int, rtx));
extern const char *output_return PARAMS ((rtx *));
+extern const char *output_sibcall PARAMS ((rtx, rtx));
extern char *output_v9branch PARAMS ((rtx, int, int, int, int, int, rtx));
extern void emit_v9_brxx_insn PARAMS ((enum rtx_code, rtx, rtx));
extern void output_double_int PARAMS ((FILE *, rtx));
extern int data_segment_operand PARAMS ((rtx, enum machine_mode));
extern int eligible_for_epilogue_delay PARAMS ((rtx, int));
extern int eligible_for_return_delay PARAMS ((rtx));
+extern int eligible_for_sibcall_delay PARAMS ((rtx));
extern int emit_move_sequence PARAMS ((rtx, enum machine_mode));
extern int extend_op PARAMS ((rtx, enum machine_mode));
extern int fcc_reg_operand PARAMS ((rtx, enum machine_mode));
88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100};
+/* Vector, indexed by hard register number, which contains 1
+ for a register that is allowable in a candidate for leaf
+ function treatment. */
+char sparc_leaf_regs[] =
+{ 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1};
+
#endif
/* Name of where we pretend to think the frame pointer points.
return 0;
}
+/* Return nonzero if TRIAL can go into the sibling call
+ delay slot. */
+
+int
+eligible_for_sibcall_delay (trial)
+ rtx trial;
+{
+ rtx pat, src;
+
+ if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+ return 0;
+
+ if (get_attr_length (trial) != 1 || profile_block_flag == 2)
+ return 0;
+
+ pat = PATTERN (trial);
+
+ if (current_function_uses_only_leaf_regs)
+ {
+ /* If the tail call is done using the call instruction,
+ we have to restore %o7 in the delay slot. */
+ if (TARGET_ARCH64 && ! TARGET_CM_MEDLOW)
+ return 0;
+
+ /* %g1 is used to build the function address */
+ if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
+ return 0;
+
+ return 1;
+ }
+
+ /* Otherwise, only operations which can be done in tandem with
+ a `restore' insn can go into the delay slot. */
+ if (GET_CODE (SET_DEST (pat)) != REG
+ || REGNO (SET_DEST (pat)) < 24
+ || REGNO (SET_DEST (pat)) >= 32)
+ return 0;
+
+ /* If it mentions %o7, it can't go in, because sibcall will clobber it
+ in most cases. */
+ if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
+ return 0;
+
+ src = SET_SRC (pat);
+
+ if (arith_operand (src, GET_MODE (src)))
+ {
+ if (TARGET_ARCH64)
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ else
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+ }
+
+ else if (arith_double_operand (src, GET_MODE (src)))
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+
+ else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
+ && register_operand (src, SFmode))
+ return 1;
+
+ else if (GET_CODE (src) == PLUS
+ && arith_operand (XEXP (src, 0), SImode)
+ && arith_operand (XEXP (src, 1), SImode)
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 1), SImode)))
+ return 1;
+
+ else if (GET_CODE (src) == PLUS
+ && arith_double_operand (XEXP (src, 0), DImode)
+ && arith_double_operand (XEXP (src, 1), DImode)
+ && (register_operand (XEXP (src, 0), DImode)
+ || register_operand (XEXP (src, 1), DImode)))
+ return 1;
+
+ else if (GET_CODE (src) == LO_SUM
+ && ! TARGET_CM_MEDMID
+ && ((register_operand (XEXP (src, 0), SImode)
+ && immediate_operand (XEXP (src, 1), SImode))
+ || (TARGET_ARCH64
+ && register_operand (XEXP (src, 0), DImode)
+ && immediate_operand (XEXP (src, 1), DImode))))
+ return 1;
+
+ else if (GET_CODE (src) == ASHIFT
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 0), DImode))
+ && XEXP (src, 1) == const1_rtx)
+ return 1;
+
+ return 0;
+}
+
static int
check_return_regs (x)
rtx x;
}
}
+/* Output code to restore any call saved registers. */
+
+static void
+output_restore_regs (file, leaf_function)
+ FILE *file;
+ int leaf_function;
+{
+ int offset, n_regs;
+ const char *base;
+
+ offset = -apparent_fsize + frame_base_offset;
+ if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
+ {
+ build_big_number (file, offset, "%g1");
+ fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
+ base = "%g1";
+ offset = 0;
+ }
+ else
+ {
+ base = frame_base_name;
+ }
+
+ n_regs = 0;
+ if (TARGET_EPILOGUE && ! leaf_function)
+ /* ??? Originally saved regs 0-15 here. */
+ n_regs = restore_regs (file, 0, 8, base, offset, 0);
+ else if (leaf_function)
+ /* ??? Originally saved regs 0-31 here. */
+ n_regs = restore_regs (file, 0, 8, base, offset, 0);
+ if (TARGET_EPILOGUE)
+ restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
+}
+
/* Output code for the function epilogue. */
void
goto output_vectors;
}
- /* Restore any call saved registers. */
if (num_gfregs)
- {
- int offset, n_regs;
- const char *base;
-
- offset = -apparent_fsize + frame_base_offset;
- if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
- {
- build_big_number (file, offset, "%g1");
- fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
- base = "%g1";
- offset = 0;
- }
- else
- {
- base = frame_base_name;
- }
-
- n_regs = 0;
- if (TARGET_EPILOGUE && ! leaf_function)
- /* ??? Originally saved regs 0-15 here. */
- n_regs = restore_regs (file, 0, 8, base, offset, 0);
- else if (leaf_function)
- /* ??? Originally saved regs 0-31 here. */
- n_regs = restore_regs (file, 0, 8, base, offset, 0);
- if (TARGET_EPILOGUE)
- restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
- }
+ output_restore_regs (file, leaf_function);
/* Work out how to skip the caller's unimp instruction if required. */
if (leaf_function)
output_vectors:
sparc_output_deferred_case_vectors ();
}
+
+/* Output a sibling call. */
+
+const char *
+output_sibcall (insn, call_operand)
+ rtx insn, call_operand;
+{
+ int leaf_regs = current_function_uses_only_leaf_regs;
+ rtx operands[3];
+ int delay_slot = dbr_sequence_length () > 0;
+
+ if (num_gfregs)
+ {
+ /* Call to restore global regs might clobber
+ the delay slot. Instead of checking for this
+ output the delay slot now. */
+ if (delay_slot)
+ {
+ rtx delay = NEXT_INSN (insn);
+
+ if (! delay)
+ abort ();
+
+ final_scan_insn (delay, asm_out_file, 1, 0, 1);
+ PATTERN (delay) = gen_blockage ();
+ INSN_CODE (delay) = -1;
+ delay_slot = 0;
+ }
+ output_restore_regs (asm_out_file, leaf_regs);
+ }
+
+ operands[0] = call_operand;
+
+ if (leaf_regs)
+ {
+ int spare_slot = (TARGET_ARCH32 || TARGET_CM_MEDLOW);
+ int size = 0;
+
+ if ((actual_fsize || ! spare_slot) && delay_slot)
+ {
+ rtx delay = NEXT_INSN (insn);
+
+ if (! delay)
+ abort ();
+
+ final_scan_insn (delay, asm_out_file, 1, 0, 1);
+ PATTERN (delay) = gen_blockage ();
+ INSN_CODE (delay) = -1;
+ delay_slot = 0;
+ }
+ if (actual_fsize)
+ {
+ if (actual_fsize <= 4096)
+ size = actual_fsize;
+ else if (actual_fsize <= 8192)
+ {
+ fputs ("\tsub\t%sp, -4096, %sp\n", asm_out_file);
+ size = actual_fsize - 4096;
+ }
+ else if ((actual_fsize & 0x3ff) == 0)
+ fprintf (asm_out_file,
+ "\tsethi\t%%hi(%d), %%g1\n\tadd\t%%sp, %%g1, %%sp\n",
+ actual_fsize);
+ else
+ {
+ fprintf (asm_out_file,
+ "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n",
+ actual_fsize, actual_fsize);
+ fputs ("\tadd\t%%sp, %%g1, %%sp\n", asm_out_file);
+ }
+ }
+ if (spare_slot)
+ {
+ output_asm_insn ("sethi\t%%hi(%a0), %%g1", operands);
+ output_asm_insn ("jmpl\t%%g1 + %%lo(%a0), %%g0", operands);
+ if (size)
+ fprintf (asm_out_file, "\t sub\t%%sp, -%d, %%sp\n", size);
+ else if (! delay_slot)
+ fputs ("\t nop\n", asm_out_file);
+ }
+ else
+ {
+ if (size)
+ fprintf (asm_out_file, "\tsub\t%%sp, -%d, %%sp\n", size);
+ output_asm_insn ("mov\t%%o7, %%g1", operands);
+ output_asm_insn ("call\t%a0, 0", operands);
+ output_asm_insn (" mov\t%%g1, %%o7", operands);
+ }
+ return "";
+ }
+
+ output_asm_insn ("call\t%a0, 0", operands);
+ if (delay_slot)
+ {
+ rtx delay = NEXT_INSN (insn), pat;
+
+ if (! delay)
+ abort ();
+
+ pat = PATTERN (delay);
+ if (GET_CODE (pat) != SET)
+ abort ();
+
+ operands[0] = SET_DEST (pat);
+ pat = SET_SRC (pat);
+ switch (GET_CODE (pat))
+ {
+ case PLUS:
+ operands[1] = XEXP (pat, 0);
+ operands[2] = XEXP (pat, 1);
+ output_asm_insn (" restore %r1, %2, %Y0", operands);
+ break;
+ case LO_SUM:
+ operands[1] = XEXP (pat, 0);
+ operands[2] = XEXP (pat, 1);
+ output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
+ break;
+ case ASHIFT:
+ operands[1] = XEXP (pat, 0);
+ output_asm_insn (" restore %r1, %r1, %Y0", operands);
+ break;
+ default:
+ operands[1] = pat;
+ output_asm_insn (" restore %%g0, %1, %Y0", operands);
+ break;
+ }
+ PATTERN (delay) = gen_blockage ();
+ INSN_CODE (delay) = -1;
+ }
+ else
+ fputs ("\t restore\n", asm_out_file);
+ return "";
+}
\f
/* Functions for handling argument passing.
return IEU0;
else if (type_mask & (TMASK (TYPE_COMPARE) |
TMASK (TYPE_CALL) |
+ TMASK (TYPE_SIBCALL) |
TMASK (TYPE_UNCOND_BRANCH)))
return IEU1;
else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
/* If we are not in the process of emptying out the pipe, try to
obtain an instruction which must be the first in it's group. */
ip = ultra_find_type ((TMASK (TYPE_CALL) |
+ TMASK (TYPE_SIBCALL) |
TMASK (TYPE_CALL_NO_DELAY_SLOT) |
TMASK (TYPE_UNCOND_BRANCH)),
ready, this_insn);
%fp, but output it as %i7. */ \
fixed_regs[31] = 1; \
reg_names[FRAME_POINTER_REGNUM] = "%i7"; \
- /* ??? This is a hack to disable leaf functions. */ \
- global_regs[7] = 1; \
+ /* Disable leaf functions */ \
+ bzero (sparc_leaf_regs, FIRST_PSEUDO_REGISTER); \
} \
if (profile_block_flag) \
{ \
#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc ()
-/* ??? %g7 is not a leaf register to effectively #undef LEAF_REGISTERS when
- -mflat is used. Function only_leaf_regs_used will return 0 if a global
- register is used and is not permitted in a leaf function. We make %g7
- a global reg if -mflat and voila. Since %g7 is a system register and is
- fixed it won't be used by gcc anyway. */
-
-#define LEAF_REGISTERS \
-{ 1, 1, 1, 1, 1, 1, 1, 0, \
- 0, 0, 0, 0, 0, 0, 1, 0, \
- 0, 0, 0, 0, 0, 0, 0, 0, \
- 1, 1, 1, 1, 1, 1, 0, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1}
+extern char sparc_leaf_regs[];
+#define LEAF_REGISTERS sparc_leaf_regs
extern char leaf_reg_remap[];
#define LEAF_REG_REMAP(REGNO) (leaf_reg_remap[REGNO])
#define STRICT_ARGUMENT_NAMING TARGET_V9
+/* We do not allow sibling calls if -mflat, nor
+ we do not allow indirect calls to be optimized into sibling calls. */
+#define FUNCTION_OK_FOR_SIBCALL(DECL) (DECL && ! TARGET_FLAT)
+
/* Generate RTL to flush the register windows so as to make arbitrary frames
available. */
#define SETUP_FRAME_ADDRESSES() \
;; type "call_no_delay_slot" is a call followed by an unimp instruction.
(define_attr "type"
- "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrts,fpsqrtd,cmove,multi,misc"
+ "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,sibcall,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrts,fpsqrtd,cmove,multi,misc"
(const_string "binary"))
;; Set true if insn uses call-clobbered intermediate register.
;; Attributes for instruction and branch scheduling
(define_attr "in_call_delay" "false,true"
- (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,return,multi")
+ (cond [(eq_attr "type" "uncond_branch,branch,call,sibcall,call_no_delay_slot,return,multi")
(const_string "false")
(eq_attr "type" "load,fpload,store,fpstore")
(if_then_else (eq_attr "length" "1")
(define_delay (eq_attr "type" "call")
[(eq_attr "in_call_delay" "true") (nil) (nil)])
+(define_attr "eligible_for_sibcall_delay" "false,true"
+ (symbol_ref "eligible_for_sibcall_delay(insn)"))
+
+(define_delay (eq_attr "type" "sibcall")
+ [(eq_attr "eligible_for_sibcall_delay" "true") (nil) (nil)])
+
(define_attr "leaf_function" "false,true"
(const (symbol_ref "current_function_uses_only_leaf_regs")))
;; because it prevents us from moving back the final store of inner loops.
(define_attr "in_branch_delay" "false,true"
- (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,call_no_delay_slot,multi")
+ (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
(eq_attr "length" "1"))
(const_string "true")
(const_string "false")))
(define_attr "in_uncond_branch_delay" "false,true"
- (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,call_no_delay_slot,multi")
+ (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
(eq_attr "length" "1"))
(const_string "true")
(const_string "false")))
(define_attr "in_annul_branch_delay" "false,true"
- (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,call_no_delay_slot,multi")
+ (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
(eq_attr "length" "1"))
(const_string "true")
(const_string "false")))
(define_function_unit "ieuN" 2 0
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "ialu,binary,move,unary,shift,compare,call,call_no_delay_slot,uncond_branch"))
+ (eq_attr "type" "ialu,binary,move,unary,shift,compare,call,sibcall,call_no_delay_slot,uncond_branch"))
1 1)
(define_function_unit "ieu0" 1 0
(define_function_unit "ieu1" 1 0
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "compare,call,call_no_delay_slot,uncond_branch"))
+ (eq_attr "type" "compare,call,sibcall,call_no_delay_slot,uncond_branch"))
1 1)
(define_function_unit "cti" 1 0
DONE;
}")
+;;- tail calls
+(define_expand "sibcall"
+ [(parallel [(call (match_operand 0 "call_operand" "") (const_int 0))
+ (return)])]
+ ""
+ "")
+
+(define_insn "*sibcall_symbolic_sp32"
+ [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+ (match_operand 1 "" ""))
+ (return)]
+ "! TARGET_PTR64"
+ "* return output_sibcall(insn, operands[0]);"
+ [(set_attr "type" "sibcall")])
+
+(define_insn "*sibcall_symbolic_sp64"
+ [(call (mem:SI (match_operand:DI 0 "symbolic_operand" "s"))
+ (match_operand 1 "" ""))
+ (return)]
+ "TARGET_PTR64"
+ "* return output_sibcall(insn, operands[0]);"
+ [(set_attr "type" "sibcall")])
+
+(define_expand "sibcall_value"
+ [(parallel [(set (match_operand 0 "register_operand" "=rf")
+ (call (match_operand:SI 1 "" "") (const_int 0)))
+ (return)])]
+ ""
+ "")
+
+(define_insn "*sibcall_value_symbolic_sp32"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "symbolic_operand" "s"))
+ (match_operand 2 "" "")))
+ (return)]
+ "! TARGET_PTR64"
+ "* return output_sibcall(insn, operands[1]);"
+ [(set_attr "type" "sibcall")])
+
+(define_insn "*sibcall_value_symbolic_sp64"
+ [(set (match_operand 0 "" "")
+ (call (mem:SI (match_operand:DI 1 "symbolic_operand" "s"))
+ (match_operand 2 "" "")))
+ (return)]
+ "TARGET_PTR64"
+ "* return output_sibcall(insn, operands[1]);"
+ [(set_attr "type" "sibcall")])
+
+(define_expand "sibcall_epilogue"
+ [(const_int 0)]
+ ""
+ "DONE;")
+
;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
;; all of memory. This blocks insns from being moved across this point.
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
- if (GET_CODE (insn) == CALL_INSN)
+ if (GET_CODE (insn) == CALL_INSN
+ && ! SIBLING_CALL_P (insn))
return 0;
if (GET_CODE (insn) == INSN
&& GET_CODE (PATTERN (insn)) == SEQUENCE
}
for (insn = current_function_epilogue_delay_list; insn; insn = XEXP (insn, 1))
{
- if (GET_CODE (XEXP (insn, 0)) == CALL_INSN)
+ if (GET_CODE (XEXP (insn, 0)) == CALL_INSN
+ && ! SIBLING_CALL_P (insn))
return 0;
if (GET_CODE (XEXP (insn, 0)) == INSN
&& GET_CODE (PATTERN (XEXP (insn, 0))) == SEQUENCE
#ifdef LEAF_REGISTERS
-static char permitted_reg_in_leaf_functions[] = LEAF_REGISTERS;
-
/* Return 1 if this function uses only the registers that can be
safely renumbered. */
only_leaf_regs_used ()
{
int i;
+ char *permitted_reg_in_leaf_functions = LEAF_REGISTERS;
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if ((regs_ever_live[i] || global_regs[i])
a leaf function. */
{
char *cheap_regs;
- static char leaf_regs[] = LEAF_REGISTERS;
+ char *leaf_regs = LEAF_REGISTERS;
if (only_leaf_regs_used () && leaf_function_p ())
cheap_regs = leaf_regs;
called function's return value was copied. Otherwise we're returning
some other value. */
+#ifndef OUTGOING_REGNO
+#define OUTGOING_REGNO(N) (N)
+#endif
+
if (SET_DEST (set) == current_function_return_rtx
&& REG_P (SET_DEST (set))
- && REGNO (SET_DEST (set)) == REGNO (hardret)
+ && OUTGOING_REGNO (REGNO (SET_DEST (set))) == REGNO (hardret)
&& SET_SRC (set) == softret)
return insn;
NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
}
-
/* Given a (possibly empty) set of potential sibling or tail recursion call
sites, determine if optimization is possible.
@table @code
@findex LEAF_REGISTERS
@item LEAF_REGISTERS
-A C initializer for a vector, indexed by hard register number, which
+Name of a char vector, indexed by hard register number, which
contains 1 for a register that is allowable in a candidate for leaf
function treatment.