re PR middle-end/42505 (loop canonicalization causes a lot of unnecessary temporary...
authorSandra Loosemore <sandra@codesourcery.com>
Sat, 10 Jul 2010 18:43:29 +0000 (14:43 -0400)
committerSandra Loosemore <sandra@gcc.gnu.org>
Sat, 10 Jul 2010 18:43:29 +0000 (14:43 -0400)
2010-07-10  Sandra Loosemore  <sandra@codesourcery.com>

PR middle-end/42505

gcc/
* tree-inline.c (estimate_num_insns): Refactor builtin complexity
lookup code into....
* builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
new functions.
* tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
* cfgloopanal.c (target_clobbered_regs): Define.
(init_set_costs): Initialize target_clobbered_regs.
(estimate_reg_pressure_cost): Add call_p argument.  When true,
adjust the number of available registers to exclude the
call-clobbered registers.
* cfgloop.h (target_clobbered_regs): Declare.
(estimate_reg_pressure_cost): Adjust declaration.
* tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
(ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
(determine_set_costs): Dump target_clobbered_regs.
(loop_body_includes_call): New function.
(tree_ssa_iv_optimize_loop): Use it to initialize new field.
* loop-invariant.c (gain_for_invariant): Adjust arguments to pass
call_p flag through.
(best_gain_for_invariant): Likewise.
(find_invariants_to_move): Likewise.
(move_single_loop_invariants): Likewise, using already-computed
has_call field.

From-SVN: r162043

gcc/ChangeLog
gcc/builtins.c
gcc/cfgloop.h
gcc/cfgloopanal.c
gcc/loop-invariant.c
gcc/tree-inline.c
gcc/tree-ssa-loop-ivopts.c
gcc/tree.h

index f943597..201f56d 100644 (file)
@@ -1,3 +1,30 @@
+2010-07-10  Sandra Loosemore  <sandra@codesourcery.com>
+
+       PR middle-end/42505
+       * tree-inline.c (estimate_num_insns): Refactor builtin complexity
+       lookup code into....
+       * builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
+       new functions.
+       * tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
+       * cfgloopanal.c (target_clobbered_regs): Define.
+       (init_set_costs): Initialize target_clobbered_regs.
+       (estimate_reg_pressure_cost): Add call_p argument.  When true,
+       adjust the number of available registers to exclude the
+       call-clobbered registers.
+       * cfgloop.h (target_clobbered_regs): Declare.
+       (estimate_reg_pressure_cost): Adjust declaration.
+       * tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
+       (ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
+       (determine_set_costs): Dump target_clobbered_regs.
+       (loop_body_includes_call): New function.
+       (tree_ssa_iv_optimize_loop): Use it to initialize new field.
+       * loop-invariant.c (gain_for_invariant): Adjust arguments to pass
+       call_p flag through.
+       (best_gain_for_invariant): Likewise.
+       (find_invariants_to_move): Likewise.
+       (move_single_loop_invariants): Likewise, using already-computed
+       has_call field.
+
 2010-07-10  Richard Guenther  <rguenther@suse.de>
            Joern Rennecke  <joern.rennecke@embecosm.com>
 
index 046593e..8039f9b 100644 (file)
@@ -13753,3 +13753,123 @@ set_builtin_user_assembler_name (tree decl, const char *asmspec)
       break;
     }
 }
+
+/* Return true if DECL is a builtin that expands to a constant or similarly
+   simple code.  */
+bool
+is_simple_builtin (tree decl)
+{
+  if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
+    switch (DECL_FUNCTION_CODE (decl))
+      {
+       /* Builtins that expand to constants.  */
+      case BUILT_IN_CONSTANT_P:
+      case BUILT_IN_EXPECT:
+      case BUILT_IN_OBJECT_SIZE:
+      case BUILT_IN_UNREACHABLE:
+       /* Simple register moves or loads from stack.  */
+      case BUILT_IN_RETURN_ADDRESS:
+      case BUILT_IN_EXTRACT_RETURN_ADDR:
+      case BUILT_IN_FROB_RETURN_ADDR:
+      case BUILT_IN_RETURN:
+      case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
+      case BUILT_IN_FRAME_ADDRESS:
+      case BUILT_IN_VA_END:
+      case BUILT_IN_STACK_SAVE:
+      case BUILT_IN_STACK_RESTORE:
+       /* Exception state returns or moves registers around.  */
+      case BUILT_IN_EH_FILTER:
+      case BUILT_IN_EH_POINTER:
+      case BUILT_IN_EH_COPY_VALUES:
+       return true;
+
+      default:
+       return false;
+      }
+
+  return false;
+}
+
+/* Return true if DECL is a builtin that is not expensive, i.e., they are
+   most probably expanded inline into reasonably simple code.  This is a
+   superset of is_simple_builtin.  */
+bool
+is_inexpensive_builtin (tree decl)
+{
+  if (!decl)
+    return false;
+  else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
+    return true;
+  else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
+    switch (DECL_FUNCTION_CODE (decl))
+      {
+      case BUILT_IN_ABS:
+      case BUILT_IN_ALLOCA:
+      case BUILT_IN_BSWAP32:
+      case BUILT_IN_BSWAP64:
+      case BUILT_IN_CLZ:
+      case BUILT_IN_CLZIMAX:
+      case BUILT_IN_CLZL:
+      case BUILT_IN_CLZLL:
+      case BUILT_IN_CTZ:
+      case BUILT_IN_CTZIMAX:
+      case BUILT_IN_CTZL:
+      case BUILT_IN_CTZLL:
+      case BUILT_IN_FFS:
+      case BUILT_IN_FFSIMAX:
+      case BUILT_IN_FFSL:
+      case BUILT_IN_FFSLL:
+      case BUILT_IN_IMAXABS:
+      case BUILT_IN_FINITE:
+      case BUILT_IN_FINITEF:
+      case BUILT_IN_FINITEL:
+      case BUILT_IN_FINITED32:
+      case BUILT_IN_FINITED64:
+      case BUILT_IN_FINITED128:
+      case BUILT_IN_FPCLASSIFY:
+      case BUILT_IN_ISFINITE:
+      case BUILT_IN_ISINF_SIGN:
+      case BUILT_IN_ISINF:
+      case BUILT_IN_ISINFF:
+      case BUILT_IN_ISINFL:
+      case BUILT_IN_ISINFD32:
+      case BUILT_IN_ISINFD64:
+      case BUILT_IN_ISINFD128:
+      case BUILT_IN_ISNAN:
+      case BUILT_IN_ISNANF:
+      case BUILT_IN_ISNANL:
+      case BUILT_IN_ISNAND32:
+      case BUILT_IN_ISNAND64:
+      case BUILT_IN_ISNAND128:
+      case BUILT_IN_ISNORMAL:
+      case BUILT_IN_ISGREATER:
+      case BUILT_IN_ISGREATEREQUAL:
+      case BUILT_IN_ISLESS:
+      case BUILT_IN_ISLESSEQUAL:
+      case BUILT_IN_ISLESSGREATER:
+      case BUILT_IN_ISUNORDERED:
+      case BUILT_IN_VA_ARG_PACK:
+      case BUILT_IN_VA_ARG_PACK_LEN:
+      case BUILT_IN_VA_COPY:
+      case BUILT_IN_TRAP:
+      case BUILT_IN_SAVEREGS:
+      case BUILT_IN_POPCOUNTL:
+      case BUILT_IN_POPCOUNTLL:
+      case BUILT_IN_POPCOUNTIMAX:
+      case BUILT_IN_POPCOUNT:
+      case BUILT_IN_PARITYL:
+      case BUILT_IN_PARITYLL:
+      case BUILT_IN_PARITYIMAX:
+      case BUILT_IN_PARITY:
+      case BUILT_IN_LABS:
+      case BUILT_IN_LLABS:
+      case BUILT_IN_PREFETCH:
+       return true;
+
+      default:
+       return is_simple_builtin (decl);
+      }
+
+  return false;
+}
+
index 3821ee6..46cda11 100644 (file)
@@ -627,13 +627,14 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags)
 /* The properties of the target.  */
 
 extern unsigned target_avail_regs;
+extern unsigned target_clobbered_regs;
 extern unsigned target_res_regs;
 extern unsigned target_reg_cost [2];
 extern unsigned target_spill_cost [2];
 
 /* Register pressure estimation for induction variable optimizations & loop
    invariant motion.  */
-extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
+extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool);
 extern void init_set_costs (void);
 
 /* Loop optimizer initialization.  */
index 129ec25..853e6f2 100644 (file)
@@ -320,6 +320,8 @@ seq_cost (const_rtx seq, bool speed)
 /* The properties of the target.  */
 
 unsigned target_avail_regs;    /* Number of available registers.  */
+unsigned target_clobbered_regs; /* Number of available registers that are
+                                  call-clobbered.  */
 unsigned target_res_regs;      /* Number of registers reserved for temporary
                                   expressions.  */
 unsigned target_reg_cost[2];   /* The cost for register when there still
@@ -342,10 +344,15 @@ init_set_costs (void)
   unsigned i;
 
   target_avail_regs = 0;
+  target_clobbered_regs = 0;
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
        && !fixed_regs[i])
-      target_avail_regs++;
+      {
+       target_avail_regs++;
+       if (call_used_regs[i])
+         target_clobbered_regs++;
+      }
 
   target_res_regs = 3;
 
@@ -379,20 +386,29 @@ init_set_costs (void)
 
 /* Estimates cost of increased register pressure caused by making N_NEW new
    registers live around the loop.  N_OLD is the number of registers live
-   around the loop.  */
+   around the loop.  If CALL_P is true, also take into account that
+   call-used registers may be clobbered in the loop body, reducing the
+   number of available registers before we spill.  */
 
 unsigned
-estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
+estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
+                           bool call_p)
 {
   unsigned cost;
   unsigned regs_needed = n_new + n_old;
+  unsigned available_regs = target_avail_regs;
+
+  /* If there is a call in the loop body, the call-clobbered registers
+     are not available for loop invariants.  */
+  if (call_p)
+    available_regs = available_regs - target_clobbered_regs;
 
   /* If we have enough registers, we should use them and not restrict
      the transformations unnecessarily.  */
-  if (regs_needed + target_res_regs <= target_avail_regs)
+  if (regs_needed + target_res_regs <= available_regs)
     return 0;
 
-  if (regs_needed <= target_avail_regs)
+  if (regs_needed <= available_regs)
     /* If we are close to running out of registers, try to preserve
        them.  */
     cost = target_reg_cost [speed] * n_new;
index 2f38123..19da7dd 100644 (file)
@@ -1173,11 +1173,13 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed)
 /* Calculates gain for eliminating invariant INV.  REGS_USED is the number
    of registers used in the loop, NEW_REGS is the number of new variables
    already added due to the invariant motion.  The number of registers needed
-   for it is stored in *REGS_NEEDED.  */
+   for it is stored in *REGS_NEEDED.  SPEED and CALL_P are flags passed
+   through to estimate_reg_pressure_cost. */
 
 static int
 gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
-                   unsigned *new_regs, unsigned regs_used, bool speed)
+                   unsigned *new_regs, unsigned regs_used,
+                   bool speed, bool call_p)
 {
   int comp_cost, size_cost;
 
@@ -1188,9 +1190,9 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
   if (! flag_ira_loop_pressure)
     {
       size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
-                                              regs_used, speed)
+                                              regs_used, speed, call_p)
                   - estimate_reg_pressure_cost (new_regs[0],
-                                                regs_used, speed));
+                                                regs_used, speed, call_p));
     }
   else
     {
@@ -1245,7 +1247,8 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
 
 static int
 best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
-                        unsigned *new_regs, unsigned regs_used, bool speed)
+                        unsigned *new_regs, unsigned regs_used,
+                        bool speed, bool call_p)
 {
   struct invariant *inv;
   int i, gain = 0, again;
@@ -1261,7 +1264,7 @@ best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
        continue;
 
       again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
-                                 speed);
+                                 speed, call_p);
       if (again > gain)
        {
          gain = again;
@@ -1314,7 +1317,7 @@ set_move_mark (unsigned invno, int gain)
 /* Determines which invariants to move.  */
 
 static void
-find_invariants_to_move (bool speed)
+find_invariants_to_move (bool speed, bool call_p)
 {
   int gain;
   unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
@@ -1353,7 +1356,8 @@ find_invariants_to_move (bool speed)
        new_regs[ira_reg_class_cover[i]] = 0;
     }
   while ((gain = best_gain_for_invariant (&inv, regs_needed,
-                                         new_regs, regs_used, speed)) > 0)
+                                         new_regs, regs_used,
+                                         speed, call_p)) > 0)
     {
       set_move_mark (inv->invno, gain);
       if (! flag_ira_loop_pressure)
@@ -1573,7 +1577,8 @@ move_single_loop_invariants (struct loop *loop)
   init_inv_motion_data ();
 
   find_invariants (loop);
-  find_invariants_to_move (optimize_loop_for_speed_p (loop));
+  find_invariants_to_move (optimize_loop_for_speed_p (loop),
+                          LOOP_DATA (loop)->has_call);
   move_invariants (loop);
 
   free_inv_motion_data ();
index 57364e4..4298958 100644 (file)
@@ -3478,105 +3478,13 @@ estimate_num_insns (gimple stmt, eni_weights *weights)
        if (POINTER_TYPE_P (funtype))
          funtype = TREE_TYPE (funtype);
 
-       if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
+       if (is_simple_builtin (decl))
+         return 0;
+       else if (is_inexpensive_builtin (decl))
          cost = weights->target_builtin_call_cost;
        else
          cost = weights->call_cost;
 
-       if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
-         switch (DECL_FUNCTION_CODE (decl))
-           {
-           /* Builtins that expand to constants.  */
-           case BUILT_IN_CONSTANT_P:
-           case BUILT_IN_EXPECT:
-           case BUILT_IN_OBJECT_SIZE:
-           case BUILT_IN_UNREACHABLE:
-           /* Simple register moves or loads from stack.  */
-           case BUILT_IN_RETURN_ADDRESS:
-           case BUILT_IN_EXTRACT_RETURN_ADDR:
-           case BUILT_IN_FROB_RETURN_ADDR:
-           case BUILT_IN_RETURN:
-           case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
-           case BUILT_IN_FRAME_ADDRESS:
-           case BUILT_IN_VA_END:
-           case BUILT_IN_STACK_SAVE:
-           case BUILT_IN_STACK_RESTORE:
-           /* Exception state returns or moves registers around.  */
-           case BUILT_IN_EH_FILTER:
-           case BUILT_IN_EH_POINTER:
-           case BUILT_IN_EH_COPY_VALUES:
-             return 0;
-
-           /* builtins that are not expensive (that is they are most probably
-              expanded inline into resonably simple code).  */
-           case BUILT_IN_ABS:
-           case BUILT_IN_ALLOCA:
-           case BUILT_IN_BSWAP32:
-           case BUILT_IN_BSWAP64:
-           case BUILT_IN_CLZ:
-           case BUILT_IN_CLZIMAX:
-           case BUILT_IN_CLZL:
-           case BUILT_IN_CLZLL:
-           case BUILT_IN_CTZ:
-           case BUILT_IN_CTZIMAX:
-           case BUILT_IN_CTZL:
-           case BUILT_IN_CTZLL:
-           case BUILT_IN_FFS:
-           case BUILT_IN_FFSIMAX:
-           case BUILT_IN_FFSL:
-           case BUILT_IN_FFSLL:
-           case BUILT_IN_IMAXABS:
-           case BUILT_IN_FINITE:
-           case BUILT_IN_FINITEF:
-           case BUILT_IN_FINITEL:
-           case BUILT_IN_FINITED32:
-           case BUILT_IN_FINITED64:
-           case BUILT_IN_FINITED128:
-           case BUILT_IN_FPCLASSIFY:
-           case BUILT_IN_ISFINITE:
-           case BUILT_IN_ISINF_SIGN:
-           case BUILT_IN_ISINF:
-           case BUILT_IN_ISINFF:
-           case BUILT_IN_ISINFL:
-           case BUILT_IN_ISINFD32:
-           case BUILT_IN_ISINFD64:
-           case BUILT_IN_ISINFD128:
-           case BUILT_IN_ISNAN:
-           case BUILT_IN_ISNANF:
-           case BUILT_IN_ISNANL:
-           case BUILT_IN_ISNAND32:
-           case BUILT_IN_ISNAND64:
-           case BUILT_IN_ISNAND128:
-           case BUILT_IN_ISNORMAL:
-           case BUILT_IN_ISGREATER:
-           case BUILT_IN_ISGREATEREQUAL:
-           case BUILT_IN_ISLESS:
-           case BUILT_IN_ISLESSEQUAL:
-           case BUILT_IN_ISLESSGREATER:
-           case BUILT_IN_ISUNORDERED:
-           case BUILT_IN_VA_ARG_PACK:
-           case BUILT_IN_VA_ARG_PACK_LEN:
-           case BUILT_IN_VA_COPY:
-           case BUILT_IN_TRAP:
-           case BUILT_IN_SAVEREGS:
-           case BUILT_IN_POPCOUNTL:
-           case BUILT_IN_POPCOUNTLL:
-           case BUILT_IN_POPCOUNTIMAX:
-           case BUILT_IN_POPCOUNT:
-           case BUILT_IN_PARITYL:
-           case BUILT_IN_PARITYLL:
-           case BUILT_IN_PARITYIMAX:
-           case BUILT_IN_PARITY:
-           case BUILT_IN_LABS:
-           case BUILT_IN_LLABS:
-           case BUILT_IN_PREFETCH:
-             cost = weights->target_builtin_call_cost;
-             break;
-
-           default:
-             break;
-           }
-
        if (decl)
          funtype = TREE_TYPE (decl);
 
index c0a2194..83ec13e 100644 (file)
@@ -260,6 +260,9 @@ struct ivopts_data
 
   /* Are we optimizing for speed?  */
   bool speed;
+
+  /* Whether the loop body includes any function calls.  */
+  bool body_includes_call;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -4431,7 +4434,8 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
 {
   /* We add size to the cost, so that we prefer eliminating ivs
      if possible.  */
-  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
+  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
+                                           data->body_includes_call);
 }
 
 /* For each size of the induction variable set determine the penalty.  */
@@ -4450,6 +4454,7 @@ determine_set_costs (struct ivopts_data *data)
     {
       fprintf (dump_file, "Global costs:\n");
       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
+      fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
     }
@@ -5859,6 +5864,25 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
   VEC_free (iv_cand_p, heap, data->iv_candidates);
 }
 
+/* Returns true if the loop body BODY includes any function calls.  */
+
+static bool
+loop_body_includes_call (basic_block *body, unsigned num_nodes)
+{
+  gimple_stmt_iterator gsi;
+  unsigned i;
+
+  for (i = 0; i < num_nodes; i++)
+    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+       gimple stmt = gsi_stmt (gsi);
+       if (is_gimple_call (stmt)
+           && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
+         return true;
+      }
+  return false;
+}
+
 /* Optimizes the LOOP.  Returns true if anything changed.  */
 
 static bool
@@ -5890,6 +5914,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
     }
 
   body = get_loop_body (loop);
+  data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
   free (body);
 
index e749d69..960ee7d 100644 (file)
@@ -5046,6 +5046,8 @@ extern tree build_range_check (location_t, tree, tree, int, tree, tree);
 extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
                          tree, tree);
 extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
+extern bool is_simple_builtin (tree);
+extern bool is_inexpensive_builtin (tree);
 
 /* In convert.c */
 extern tree strip_float_extensions (tree);