1 /* Tail calls optimization on trees.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
23 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "diagnostic.h"
35 #include "tree-pass.h"
37 #include "langhooks.h"
39 /* The file implements the tail recursion elimination. It is also used to
40 analyse the tail calls in general, passing the results to the rtl level
41 where they are used for sibcall optimization.
43 In addition to the standard tail recursion elimination, we handle the most
44 trivial cases of making the call tail recursive by creating accumulators.
45 For example the following function
50 return n + sum (n - 1);
67 To do this, we maintain two accumulators (a_acc and m_acc) that indicate
68 when we reach the return x statement, we should return a_acc + x * m_acc
69 instead. They are initially initialized to 0 and 1, respectively,
70 so the semantics of the function is obviously preserved. If we are
71 guaranteed that the value of the accumulator never change, we
74 There are three cases how the function may exit. The first one is
75 handled in adjust_return_value, the later two in adjust_accumulator_values
76 (the second case is actually a special case of the third one and we
77 present it separately just for clarity):
79 1) Just return x, where x is not in any of the remaining special shapes.
80 We rewrite this to a gimple equivalent of return m_acc * x + a_acc.
82 2) return f (...), where f is the current function, is rewritten in a
83 clasical tail-recursion elimination way, into assignment of arguments
84 and jump to the start of the function. Values of the accumulators
87 3) return a + m * f(...), where a and m do not depend on call to f.
88 To preserve the semantics described before we want this to be rewritten
89 in such a way that we finally return
91 a_acc + (a + m * f(...)) * m_acc = (a_acc + a * m_acc) + (m * m_acc) * f(...).
93 I.e. we increase a_acc by a * m_acc, multiply m_acc by m and
94 eliminate the tail call to f. Special cases when the value is just
95 added or just multiplied are obtained by setting a = 0 or m = 1.
97 TODO -- it is possible to do similar tricks for other operations. */
99 /* A structure that describes the tailcall. */
103 /* The block in that the call occur. */
104 basic_block call_block;
106 /* The iterator pointing to the call statement. */
107 block_stmt_iterator call_bsi;
109 /* True if it is a call to the current function. */
112 /* The return value of the caller is mult * f + add, where f is the return
113 value of the call. */
116 /* Next tailcall in the chain. */
117 struct tailcall *next;
120 /* The variables holding the value of multiplicative and additive
122 static tree m_acc, a_acc;
124 static bool suitable_for_tail_opt_p (void);
125 static bool optimize_tail_call (struct tailcall *, bool);
126 static void eliminate_tail_call (struct tailcall *);
127 static void find_tail_calls (basic_block, struct tailcall **);
129 /* Returns false when the function is not suitable for tail call optimization
130 from some reason (e.g. if it takes variable number of arguments). */
133 suitable_for_tail_opt_p (void)
137 if (current_function_stdarg)
140 /* No local variable should be call-clobbered. We ignore any kind
141 of memory tag, as these are not real variables. */
142 for (i = 0; i < (int) VARRAY_ACTIVE_SIZE (referenced_vars); i++)
144 tree var = VARRAY_TREE (referenced_vars, i);
146 if (decl_function_context (var) == current_function_decl
147 && !TREE_STATIC (var)
148 && var_ann (var)->mem_tag_kind == NOT_A_TAG
149 && is_call_clobbered (var))
155 /* Returns false when the function is not suitable for tail call optimization
156 from some reason (e.g. if it takes variable number of arguments).
157 This test must pass in addition to suitable_for_tail_opt_p in order to make
158 tail call discovery happen. */
161 suitable_for_tail_call_opt_p (void)
163 /* alloca (until we have stack slot life analysis) inhibits
164 sibling call optimizations, but not tail recursion. */
165 if (current_function_calls_alloca)
168 /* If we are using sjlj exceptions, we may need to add a call to
169 _Unwind_SjLj_Unregister at exit of the function. Which means
170 that we cannot do any sibcall transformations. */
171 if (USING_SJLJ_EXCEPTIONS && current_function_has_exception_handlers ())
174 /* Any function that calls setjmp might have longjmp called from
175 any called function. ??? We really should represent this
176 properly in the CFG so that this needn't be special cased. */
177 if (current_function_calls_setjmp)
183 /* Checks whether the expression EXPR in stmt AT is independent of the
184 statement pointed by BSI (in a sense that we already know EXPR's value
185 at BSI). We use the fact that we are only called from the chain of
186 basic blocks that have only single successor. Returns the expression
187 containing the value of EXPR at BSI. */
190 independent_of_stmt_p (tree expr, tree at, block_stmt_iterator bsi)
192 basic_block bb, call_bb, at_bb;
195 if (is_gimple_min_invariant (expr))
198 if (TREE_CODE (expr) != SSA_NAME)
201 /* Mark the blocks in the chain leading to the end. */
202 at_bb = bb_for_stmt (at);
203 call_bb = bb_for_stmt (bsi_stmt (bsi));
204 for (bb = call_bb; bb != at_bb; bb = bb->succ->dest)
210 at = SSA_NAME_DEF_STMT (expr);
211 bb = bb_for_stmt (at);
213 /* The default defininition or defined before the chain. */
219 for (; !bsi_end_p (bsi); bsi_next (&bsi))
220 if (bsi_stmt (bsi) == at)
223 if (!bsi_end_p (bsi))
228 if (TREE_CODE (at) != PHI_NODE)
234 for (e = bb->pred; e; e = e->pred_next)
240 expr = phi_element_for_edge (at, e)->def;
243 /* Unmark the blocks. */
244 for (bb = call_bb; bb != at_bb; bb = bb->succ->dest)
251 /* Simulates the effect of an assignment of ASS in STMT on the return value
252 of the tail recursive CALL passed in ASS_VAR. M and A are the
253 multiplicative and the additive factor for the real return value. */
256 process_assignment (tree ass, tree stmt, block_stmt_iterator call, tree *m,
257 tree *a, tree *ass_var)
259 tree op0, op1, non_ass_var;
260 tree dest = TREE_OPERAND (ass, 0);
261 tree src = TREE_OPERAND (ass, 1);
262 enum tree_code code = TREE_CODE (src);
264 if (code == SSA_NAME)
273 if (TREE_CODE_CLASS (code) != '2')
276 /* We only handle the code like
283 TODO -- Extend it for cases where the linear transformation of the output
284 is expressed in a more complicated way. */
286 op0 = TREE_OPERAND (src, 0);
287 op1 = TREE_OPERAND (src, 1);
290 && (non_ass_var = independent_of_stmt_p (op1, stmt, call)))
292 else if (op1 == *ass_var
293 && (non_ass_var = independent_of_stmt_p (op0, stmt, call)))
298 switch (TREE_CODE (src))
301 /* There should be no previous addition. TODO -- it should be fairly
302 straightforward to lift this restriction -- just allow storing
303 more complicated expressions in *A, and gimplify it in
304 adjust_accumulator_values. */
312 /* Similar remark applies here. Handling multiplication after addition
313 is just slightly more complicated -- we need to multiply both *A and
321 /* TODO -- Handle other codes (NEGATE_EXPR, MINUS_EXPR). */
328 /* Propagate VAR through phis on edge E. */
331 propagate_through_phis (tree var, edge e)
333 basic_block dest = e->dest;
336 for (phi = phi_nodes (dest); phi; phi = TREE_CHAIN (phi))
337 if (phi_element_for_edge (phi, e)->def == var)
338 return PHI_RESULT (phi);
343 /* Finds tailcalls falling into basic block BB. The list of found tailcalls is
344 added to the start of RET. */
347 find_tail_calls (basic_block bb, struct tailcall **ret)
349 tree ass_var, ret_var, stmt, func, param, args, call = NULL_TREE;
350 block_stmt_iterator bsi, absi;
358 if (bb->succ->succ_next)
361 for (bsi = bsi_last (bb); !bsi_end_p (bsi); bsi_prev (&bsi))
363 stmt = bsi_stmt (bsi);
366 if (TREE_CODE (stmt) == LABEL_EXPR)
369 get_stmt_operands (stmt);
371 /* Check for a call. */
372 if (TREE_CODE (stmt) == MODIFY_EXPR)
374 ass_var = TREE_OPERAND (stmt, 0);
375 call = TREE_OPERAND (stmt, 1);
383 if (TREE_CODE (call) == CALL_EXPR)
386 /* If the statement has virtual operands, fail. */
387 ann = stmt_ann (stmt);
388 if (NUM_VDEFS (VDEF_OPS (ann))
389 || NUM_VUSES (VUSE_OPS (ann)))
395 /* Recurse to the predecessors. */
396 for (e = bb->pred; e; e = e->pred_next)
397 find_tail_calls (e->src, ret);
402 /* We found the call, check whether it is suitable. */
403 tail_recursion = false;
404 func = get_callee_fndecl (call);
405 if (func == current_function_decl)
407 for (param = DECL_ARGUMENTS (func), args = TREE_OPERAND (call, 1);
409 param = TREE_CHAIN (param), args = TREE_CHAIN (args))
411 tree arg = TREE_VALUE (args);
413 /* Make sure there are no problems with copying. Note we must
414 have a copyable type and the two arguments must have reasonably
415 equivalent types. The latter requirement could be relaxed if
416 we emitted a suitable type conversion statement. */
417 && (!is_gimple_reg_type (TREE_TYPE (param))
418 || !lang_hooks.types_compatible_p (TREE_TYPE (param),
423 tail_recursion = true;
426 /* Now check the statements after the call. None of them has virtual
427 operands, so they may only depend on the call through its return
428 value. The return value should also be dependent on each of them,
429 since we are running after dce. */
439 while (bsi_end_p (absi))
441 ass_var = propagate_through_phis (ass_var, abb->succ);
442 abb = abb->succ->dest;
443 absi = bsi_start (abb);
446 stmt = bsi_stmt (absi);
448 if (TREE_CODE (stmt) == LABEL_EXPR)
451 if (TREE_CODE (stmt) == RETURN_EXPR)
454 if (TREE_CODE (stmt) != MODIFY_EXPR)
457 /* Unless this is a tail recursive call, we cannot do anything with
458 the statement anyway. */
462 if (!process_assignment (stmt, stmt, bsi, &m, &a, &ass_var))
466 ret_var = TREE_OPERAND (stmt, 0);
468 && TREE_CODE (ret_var) == MODIFY_EXPR)
471 && TREE_CODE (TREE_OPERAND (ret_var, 1)) != SSA_NAME)
474 if (!process_assignment (ret_var, stmt, bsi, &m, &a, &ass_var))
476 ret_var = TREE_OPERAND (ret_var, 0);
479 /* We may proceed if there either is no return value, or the return value
480 is identical to the call's return. */
482 && (ret_var != ass_var))
485 nw = xmalloc (sizeof (struct tailcall));
490 nw->tail_recursion = tail_recursion;
499 /* Adjust the accumulator values according to A and M after BSI, and update
500 the phi nodes on edge BACK. */
503 adjust_accumulator_values (block_stmt_iterator bsi, tree m, tree a, edge back)
505 tree stmt, var, phi, tmp;
506 tree ret_type = TREE_TYPE (DECL_RESULT (current_function_decl));
507 tree a_acc_arg = a_acc, m_acc_arg = m_acc;
513 if (integer_onep (a))
517 stmt = build (MODIFY_EXPR, ret_type, NULL_TREE,
518 build (MULT_EXPR, ret_type, m_acc, a));
520 tmp = create_tmp_var (ret_type, "acc_tmp");
521 add_referenced_tmp_var (tmp);
523 var = make_ssa_name (tmp, stmt);
524 TREE_OPERAND (stmt, 0) = var;
525 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
531 stmt = build (MODIFY_EXPR, ret_type, NULL_TREE,
532 build (PLUS_EXPR, ret_type, a_acc, var));
533 var = make_ssa_name (SSA_NAME_VAR (a_acc), stmt);
534 TREE_OPERAND (stmt, 0) = var;
535 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
541 stmt = build (MODIFY_EXPR, ret_type, NULL_TREE,
542 build (MULT_EXPR, ret_type, m_acc, m));
543 var = make_ssa_name (SSA_NAME_VAR (m_acc), stmt);
544 TREE_OPERAND (stmt, 0) = var;
545 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
551 for (phi = phi_nodes (back->dest); phi; phi = TREE_CHAIN (phi))
552 if (PHI_RESULT (phi) == a_acc)
555 add_phi_arg (&phi, a_acc_arg, back);
560 for (phi = phi_nodes (back->dest); phi; phi = TREE_CHAIN (phi))
561 if (PHI_RESULT (phi) == m_acc)
564 add_phi_arg (&phi, m_acc_arg, back);
568 /* Adjust value of the return at the end of BB accodring to M and A
572 adjust_return_value (basic_block bb, tree m, tree a)
574 tree ret_stmt = last_stmt (bb), ret_var, var, stmt, tmp;
575 tree ret_type = TREE_TYPE (DECL_RESULT (current_function_decl));
576 block_stmt_iterator bsi = bsi_last (bb);
578 if (TREE_CODE (ret_stmt) != RETURN_EXPR)
581 ret_var = TREE_OPERAND (ret_stmt, 0);
585 if (TREE_CODE (ret_var) == MODIFY_EXPR)
587 ret_var->common.ann = (tree_ann) stmt_ann (ret_stmt);
588 bsi_replace (&bsi, ret_var, true);
589 SSA_NAME_DEF_STMT (TREE_OPERAND (ret_var, 0)) = ret_var;
590 ret_var = TREE_OPERAND (ret_var, 0);
591 ret_stmt = build1 (RETURN_EXPR, TREE_TYPE (ret_stmt), ret_var);
592 bsi_insert_after (&bsi, ret_stmt, BSI_NEW_STMT);
597 stmt = build (MODIFY_EXPR, ret_type, NULL_TREE,
598 build (MULT_EXPR, ret_type, m_acc, ret_var));
600 tmp = create_tmp_var (ret_type, "acc_tmp");
601 add_referenced_tmp_var (tmp);
603 var = make_ssa_name (tmp, stmt);
604 TREE_OPERAND (stmt, 0) = var;
605 bsi_insert_before (&bsi, stmt, BSI_NEW_STMT);
612 stmt = build (MODIFY_EXPR, ret_type, NULL_TREE,
613 build (PLUS_EXPR, ret_type, a_acc, var));
615 tmp = create_tmp_var (ret_type, "acc_tmp");
616 add_referenced_tmp_var (tmp);
618 var = make_ssa_name (tmp, stmt);
619 TREE_OPERAND (stmt, 0) = var;
620 bsi_insert_before (&bsi, stmt, BSI_NEW_STMT);
623 TREE_OPERAND (ret_stmt, 0) = var;
624 modify_stmt (ret_stmt);
627 /* Eliminates tail call described by T. TMP_VARS is a list of
628 temporary variables used to copy the function arguments. */
631 eliminate_tail_call (struct tailcall *t)
633 tree param, stmt, args, rslt, call;
634 basic_block bb, first;
641 stmt = bsi_stmt (t->call_bsi);
642 get_stmt_operands (stmt);
643 ann = stmt_ann (stmt);
646 if (dump_file && (dump_flags & TDF_DETAILS))
648 fprintf (dump_file, "Eliminated tail recursion in bb %d : ",
650 print_generic_stmt (dump_file, stmt, TDF_SLIM);
651 fprintf (dump_file, "\n");
654 if (TREE_CODE (stmt) == MODIFY_EXPR)
655 stmt = TREE_OPERAND (stmt, 1);
657 first = ENTRY_BLOCK_PTR->succ->dest;
659 /* Replace the call by a jump to the start of function. */
660 e = redirect_edge_and_branch (t->call_block->succ, first);
663 PENDING_STMT (e) = NULL_TREE;
665 /* Add phi node entries for arguments. Not every PHI node corresponds to
666 a function argument (there may be PHI nodes for virtual definitions of the
667 eliminated calls), so we search for a PHI corresponding to each argument
668 rather than searching for which argument a PHI node corresponds to. */
670 for (param = DECL_ARGUMENTS (current_function_decl),
671 args = TREE_OPERAND (stmt, 1);
673 param = TREE_CHAIN (param),
674 args = TREE_CHAIN (args))
677 for (phi = phi_nodes (first); phi; phi = TREE_CHAIN (phi))
678 if (param == SSA_NAME_VAR (PHI_RESULT (phi)))
681 /* The phi node indeed does not have to be there, in case the operand is
682 invariant in the function. */
686 add_phi_arg (&phi, TREE_VALUE (args), e);
689 /* Add phi nodes for the call clobbered variables. */
690 vdefs = VDEF_OPS (ann);
691 for (i = 0; i < NUM_VDEFS (vdefs); i++)
693 param = SSA_NAME_VAR (VDEF_RESULT (vdefs, i));
694 for (phi = phi_nodes (first); phi; phi = TREE_CHAIN (phi))
695 if (param == SSA_NAME_VAR (PHI_RESULT (phi)))
700 tree name = var_ann (param)->default_def;
701 tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name));
703 var_ann (param)->default_def = new_name;
704 phi = create_phi_node (name, first);
705 SSA_NAME_DEF_STMT (name) = phi;
706 add_phi_arg (&phi, new_name, ENTRY_BLOCK_PTR->succ);
708 /* For all calls the same set of variables should be clobbered. This
709 means that there always should be the appropriate phi node except
710 for the first time we eliminate the call. */
711 if (first->pred->pred_next->pred_next)
715 add_phi_arg (&phi, VDEF_OP (vdefs, i), e);
718 /* Update the values of accumulators. */
719 adjust_accumulator_values (t->call_bsi, t->mult, t->add, e);
721 call = bsi_stmt (t->call_bsi);
722 if (TREE_CODE (call) == MODIFY_EXPR)
724 rslt = TREE_OPERAND (call, 0);
726 /* Result of the call will no longer be defined. So adjust the
727 SSA_NAME_DEF_STMT accordingly. */
728 SSA_NAME_DEF_STMT (rslt) = build_empty_stmt ();
731 bsi_remove (&t->call_bsi);
734 /* Optimizes the tailcall described by T. If OPT_TAILCALLS is true, also
735 mark the tailcalls for the sibcall optimization. */
738 optimize_tail_call (struct tailcall *t, bool opt_tailcalls)
740 if (t->tail_recursion)
742 eliminate_tail_call (t);
748 tree stmt = bsi_stmt (t->call_bsi);
750 if (TREE_CODE (stmt) == MODIFY_EXPR)
751 stmt = TREE_OPERAND (stmt, 1);
752 if (TREE_CODE (stmt) != CALL_EXPR)
754 CALL_EXPR_TAILCALL (stmt) = 1;
755 if (dump_file && (dump_flags & TDF_DETAILS))
757 fprintf (dump_file, "Found tail call ");
758 print_generic_expr (dump_file, stmt, dump_flags);
759 fprintf (dump_file, " in bb %i\n", t->call_block->index);
766 /* Optimizes tail calls in the function, turning the tail recursion
770 tree_optimize_tail_calls_1 (bool opt_tailcalls)
773 bool phis_constructed = false;
774 struct tailcall *tailcalls = NULL, *act, *next;
775 bool changed = false;
776 basic_block first = ENTRY_BLOCK_PTR->succ->dest;
777 tree stmt, param, ret_type, tmp, phi;
779 if (!suitable_for_tail_opt_p ())
782 opt_tailcalls = suitable_for_tail_call_opt_p ();
784 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
786 /* Only traverse the normal exits, i.e. those that end with return
788 stmt = last_stmt (e->src);
791 && TREE_CODE (stmt) == RETURN_EXPR)
792 find_tail_calls (e->src, &tailcalls);
795 /* Construct the phi nodes and accumulators if necessary. */
796 a_acc = m_acc = NULL_TREE;
797 for (act = tailcalls; act; act = act->next)
799 if (!act->tail_recursion)
802 if (!phis_constructed)
804 /* Ensure that there is only one predecessor of the block. */
805 if (first->pred->pred_next)
806 first = split_edge (ENTRY_BLOCK_PTR->succ);
808 /* Copy the args if needed. */
809 for (param = DECL_ARGUMENTS (current_function_decl);
811 param = TREE_CHAIN (param))
813 /* Also parameters that are only defined but never used need not
815 && (var_ann (param)->default_def
816 && TREE_CODE (var_ann (param)->default_def) == SSA_NAME))
818 tree name = var_ann (param)->default_def;
819 tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name));
822 var_ann (param)->default_def = new_name;
823 phi = create_phi_node (name, first);
824 SSA_NAME_DEF_STMT (name) = phi;
825 add_phi_arg (&phi, new_name, first->pred);
827 phis_constructed = true;
830 if (act->add && !a_acc)
832 ret_type = TREE_TYPE (DECL_RESULT (current_function_decl));
834 tmp = create_tmp_var (ret_type, "add_acc");
835 add_referenced_tmp_var (tmp);
837 phi = create_phi_node (tmp, first);
838 add_phi_arg (&phi, convert (ret_type, integer_zero_node),
840 a_acc = PHI_RESULT (phi);
843 if (act->mult && !m_acc)
845 ret_type = TREE_TYPE (DECL_RESULT (current_function_decl));
847 tmp = create_tmp_var (ret_type, "mult_acc");
848 add_referenced_tmp_var (tmp);
850 phi = create_phi_node (tmp, first);
851 add_phi_arg (&phi, convert (ret_type, integer_one_node),
853 m_acc = PHI_RESULT (phi);
857 for (; tailcalls; tailcalls = next)
859 next = tailcalls->next;
860 changed |= optimize_tail_call (tailcalls, opt_tailcalls);
866 /* Modify the remaining return statements. */
867 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
869 stmt = last_stmt (e->src);
872 && TREE_CODE (stmt) == RETURN_EXPR)
873 adjust_return_value (e->src, m_acc, a_acc);
879 free_dominance_info (CDI_DOMINATORS);
885 execute_tail_recursion (void)
887 tree_optimize_tail_calls_1 (false);
891 gate_tail_calls (void)
893 return flag_optimize_sibling_calls != 0;
897 execute_tail_calls (void)
899 tree_optimize_tail_calls_1 (true);
902 struct tree_opt_pass pass_tail_recursion =
906 execute_tail_recursion, /* execute */
909 0, /* static_pass_number */
911 PROP_cfg | PROP_ssa, /* properties_required */
912 0, /* properties_provided */
913 0, /* properties_destroyed */
914 0, /* todo_flags_start */
915 TODO_dump_func | TODO_verify_ssa /* todo_flags_finish */
918 struct tree_opt_pass pass_tail_calls =
921 gate_tail_calls, /* gate */
922 execute_tail_calls, /* execute */
925 0, /* static_pass_number */
927 PROP_cfg | PROP_ssa, /* properties_required */
928 0, /* properties_provided */
929 0, /* properties_destroyed */
930 0, /* todo_flags_start */
931 TODO_dump_func | TODO_verify_ssa /* todo_flags_finish */