ira: Consider modelling caller-save allocations as loop spills

author Richard Sandiford <richard.sandiford@arm.com>

Mon, 10 Jan 2022 14:47:08 +0000 (14:47 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Mon, 10 Jan 2022 14:47:08 +0000 (14:47 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Mon, 10 Jan 2022 14:47:08 +0000 (14:47 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Mon, 10 Jan 2022 14:47:08 +0000 (14:47 +0000)
diff --git a/gcc/ira-build.c b/gcc/ira-build.c

index 875b4d8..ab3e871 100644 (file)
--- a/gcc/ira-build.c
+++ b/gcc/ira-build.c
@@ -2000,6 +2000,8 @@ ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a,
                               int spill_cost)
  {
    HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a);
+  if (ira_caller_save_loop_spill_p (parent_a, a, spill_cost))
+    conflicts |= ira_need_caller_save_regs (a);
    conflicts &= ~ira_total_conflict_hard_regs (parent_a);
  
    auto costs = ALLOCNO_HARD_REG_COSTS (a);
@@ -2069,15 +2071,18 @@ propagate_allocno_info (void)
           if (!ira_subloop_allocnos_can_differ_p (parent_a))
             merge_hard_reg_conflicts (a, parent_a, true);
  
-         ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
-         ALLOCNO_CALLS_CROSSED_NUM (parent_a)
-           += ALLOCNO_CALLS_CROSSED_NUM (a);
-         ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
-           += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a);
-         ALLOCNO_CROSSED_CALLS_ABIS (parent_a)
-           |= ALLOCNO_CROSSED_CALLS_ABIS (a);
-         ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a)
-           |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a);
+         if (!ira_caller_save_loop_spill_p (parent_a, a, spill_cost))
+           {
+             ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
+             ALLOCNO_CALLS_CROSSED_NUM (parent_a)
+               += ALLOCNO_CALLS_CROSSED_NUM (a);
+             ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
+               += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a);
+             ALLOCNO_CROSSED_CALLS_ABIS (parent_a)
+               |= ALLOCNO_CROSSED_CALLS_ABIS (a);
+             ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a)
+               |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a);
+           }
           ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a)
             += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a);
           aclass = ALLOCNO_CLASS (a);
diff --git a/gcc/ira-color.c b/gcc/ira-color.c

index 4344ee6..1487afc 100644 (file)
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -3597,11 +3597,16 @@ move_spill_restore (void)
                  propagate_allocno_info will have propagated
                  the cost of spilling HARD_REGNO in SUBLOOP_NODE.
                  (ira_subloop_allocnos_can_differ_p must be true
-                in that case.)  Otherwise, SPILL_COST acted as
-                a cap on the propagated register cost, in cases
-                where the allocations can differ.  */
+                in that case.)  If HARD_REGNO is a caller-saved
+                register, we might have modelled it in the same way.
+
+                Otherwise, SPILL_COST acted as a cap on the propagated
+                register cost, in cases where the allocations can differ.  */
               auto conflicts = ira_total_conflict_hard_regs (subloop_allocno);
-             if (TEST_HARD_REG_BIT (conflicts, hard_regno))
+             if (TEST_HARD_REG_BIT (conflicts, hard_regno)
+                 || (ira_need_caller_save_p (subloop_allocno, hard_regno)
+                     && ira_caller_save_loop_spill_p (a, subloop_allocno,
+                                                      spill_cost)))
                 reg_cost = spill_cost;
               else if (ira_subloop_allocnos_can_differ_p (a))
                 reg_cost = MIN (reg_cost, spill_cost);
diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c

index 280befc..cbb58d3 100644 (file)
--- a/gcc/ira-costs.c
+++ b/gcc/ira-costs.c
@@ -2308,7 +2308,7 @@ ira_tune_allocno_costs (void)
  {
    int j, n, regno;
    int cost, min_cost, *reg_costs;
-  enum reg_class aclass, rclass;
+  enum reg_class aclass;
    machine_mode mode;
    ira_allocno_t a;
    ira_allocno_iterator ai;
@@ -2347,12 +2347,9 @@ ira_tune_allocno_costs (void)
                 }
               if (skip_p)
                 continue;
-             rclass = REGNO_REG_CLASS (regno);
               cost = 0;
               if (ira_need_caller_save_p (a, regno))
-               cost += (ALLOCNO_CALL_FREQ (a)
-                        * (ira_memory_move_cost[mode][rclass][0]
-                           + ira_memory_move_cost[mode][rclass][1]));
+               cost += ira_caller_save_cost (a);
  #ifdef IRA_HARD_REGNO_ADD_COST_MULTIPLIER
               cost += ((ira_memory_move_cost[mode][rclass][0]
                         + ira_memory_move_cost[mode][rclass][1])
diff --git a/gcc/ira-int.h b/gcc/ira-int.h

index 8b87498..a78811e 100644 (file)
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -1660,4 +1660,43 @@ ira_total_conflict_hard_regs (ira_allocno_t a)
    return conflicts;
  }
  
+/* Return the cost of saving a caller-saved register before each call
+   in A's live range and restoring the same register after each call.  */
+inline int
+ira_caller_save_cost (ira_allocno_t a)
+{
+  auto mode = ALLOCNO_MODE (a);
+  auto rclass = ALLOCNO_CLASS (a);
+  return (ALLOCNO_CALL_FREQ (a)
+         * (ira_memory_move_cost[mode][rclass][0]
+            + ira_memory_move_cost[mode][rclass][1]));
+}
+
+/* A and SUBLOOP_A are allocnos for the same pseudo register, with A's
+   loop immediately enclosing SUBLOOP_A's loop.  If we allocate to A a
+   hard register R that is clobbered by a call in SUBLOOP_A, decide
+   which of the following approaches should be used for handling the
+   conflict:
+
+   (1) Spill R on entry to SUBLOOP_A's loop, assign memory to SUBLOOP_A,
+       and restore R on exit from SUBLOOP_A's loop.
+
+   (2) Spill R before each necessary call in SUBLOOP_A's live range and
+       restore R after each such call.
+
+   Return true if (1) is better than (2).  SPILL_COST is the cost of
+   doing (1).  */
+inline bool
+ira_caller_save_loop_spill_p (ira_allocno_t a, ira_allocno_t subloop_a,
+                             int spill_cost)
+{
+  if (!ira_subloop_allocnos_can_differ_p (a))
+    return false;
+
+  /* Calculate the cost of saving a call-clobbered register
+     before each call and restoring it afterwards.  */
+  int call_cost = ira_caller_save_cost (subloop_a);
+  return call_cost && call_cost >= spill_cost;
+}
+
  #endif /* GCC_IRA_INT_H */
diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c

new file mode 100644 (file)

index 0000000..7acdc43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c
@@ -0,0 +1,65 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+  int data;
+  volatile struct L *next;
+  volatile struct L *inner;
+};
+
+void ext();
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+   body has no stack accesses.  It's possible that we'll need to update this
+   pattern for unrelated code changes. but the test should be XFAILed rather
+   than changed if any new stack accesses creep into the !head->inner path.  */
+/*
+** foo:
+**     ...
+**     ldr     (w[0-9]+), \[(x[0-9]+)\]
+**     add     (w[0-9]+), (?:\3, \1|\1, \3)
+**     ldr     (x[0-9]+), \[\2, #?16\]
+**     str     \3, \[\2\]
+**     ldr     \2, \[\2, #?8\]
+**     cbn?z   \4, .*
+**     ...
+**     ret
+*/
+void
+foo (volatile struct L *head, int inc, double *ptr)
+{
+  double d = *ptr;
+  while (head)
+    {
+      /* Clobber all call-preserved GPRs, so that the loop has to use
+        call-clobbered GPRs if it is to avoid spilling.  */
+      asm volatile ("" :::
+                   "x19", "x20", "x21", "x22", "x23",
+                   "x24", "x25", "x26", "x27", "x28");
+      inc = head->data + inc;
+      volatile struct L *inner = head->inner;
+      head->data = inc;
+      head = head->next;
+      if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+       for (int i = 0; i < 1000; ++i)
+         {
+           ext ();
+           /* Hack to create high register pressure, so that IRA doesn't
+              collapse this loop into the parent loop.  */
+           d += 1;
+           asm volatile ("// foo" :::
+                         "d0", "d1", "d2", "d3",
+                         "d4", "d5", "d6", "d7",
+                         "d8", "d9", "d10", "d11",
+                         "d12", "d13", "d14", "d15",
+                         "d16", "d17", "d18", "d19",
+                         "d20", "d21", "d22", "d23",
+                         "d24", "d25", "d26", "d27",
+                         "d28", "d29", "d30", "d31");
+         }
+    }
+  *ptr = d;
+}
author	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 10 Jan 2022 14:47:08 +0000 (14:47 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 10 Jan 2022 14:47:08 +0000 (14:47 +0000)
gcc/ira-build.c		patch \| blob \| history
gcc/ira-color.c		patch \| blob \| history
gcc/ira-costs.c		patch \| blob \| history
gcc/ira-int.h		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c	[new file with mode: 0644]	patch \| blob