#include "reload.h"
#include "cfgloop.h"
+/* To prevent soft conflict detection becoming quadratic in the
+ loop depth. Only for very pathological cases, so it hardly
+ seems worth a --param. */
+const int max_soft_conflict_loop_depth = 64;
+
typedef struct allocno_hard_regs *allocno_hard_regs_t;
/* The structure contains information about hard registers can be
return nregs;
}
+/* Allocnos A1 and A2 are known to conflict. Check whether, in some loop L
+ that is either the current loop or a nested subloop, the conflict is of
+ the following form:
+
+ - One allocno (X) is a cap allocno for some non-cap allocno X2.
+
+ - X2 belongs to some loop L2.
+
+ - The other allocno (Y) is a non-cap allocno.
+
+ - Y is an ancestor of some allocno Y2 in L2. (Note that such a Y2
+ must exist, given that X and Y conflict.)
+
+ - Y2 is not referenced in L2 (that is, ALLOCNO_NREFS (Y2) == 0).
+
+ - Y can use a different allocation from Y2.
+
+ In this case, Y's register is live across L2 but is not used within it,
+ whereas X's register is used only within L2. The conflict is therefore
+ only "soft", in that it can easily be avoided by spilling Y2 inside L2
+ without affecting any insn references.
+
+ If the conflict does have this form, return the Y2 that would need to be
+ spilled in order to allow X and Y (and thus A1 and A2) to use the same
+ register. Return null otherwise. Returning null is conservatively correct;
+ any nonnnull return value is an optimization. */
+ira_allocno_t
+ira_soft_conflict (ira_allocno_t a1, ira_allocno_t a2)
+{
+ /* Search for the loop L and its associated allocnos X and Y. */
+ int search_depth = 0;
+ while (ALLOCNO_CAP_MEMBER (a1) && ALLOCNO_CAP_MEMBER (a2))
+ {
+ a1 = ALLOCNO_CAP_MEMBER (a1);
+ a2 = ALLOCNO_CAP_MEMBER (a2);
+ if (search_depth++ > max_soft_conflict_loop_depth)
+ return nullptr;
+ }
+ /* This must be true if A1 and A2 conflict. */
+ ira_assert (ALLOCNO_LOOP_TREE_NODE (a1) == ALLOCNO_LOOP_TREE_NODE (a2));
+
+ /* Make A1 the cap allocno (X in the comment above) and A2 the
+ non-cap allocno (Y in the comment above). */
+ if (ALLOCNO_CAP_MEMBER (a2))
+ std::swap (a1, a2);
+ if (!ALLOCNO_CAP_MEMBER (a1))
+ return nullptr;
+
+ /* Search for the real allocno that A1 caps (X2 in the comment above). */
+ do
+ {
+ a1 = ALLOCNO_CAP_MEMBER (a1);
+ if (search_depth++ > max_soft_conflict_loop_depth)
+ return nullptr;
+ }
+ while (ALLOCNO_CAP_MEMBER (a1));
+
+ /* Find the associated allocno for A2 (Y2 in the comment above). */
+ auto node = ALLOCNO_LOOP_TREE_NODE (a1);
+ auto local_a2 = node->regno_allocno_map[ALLOCNO_REGNO (a2)];
+
+ /* Find the parent of LOCAL_A2/Y2. LOCAL_A2 must be a descendant of A2
+ for the conflict query to make sense, so this parent lookup must succeed.
+
+ If the parent allocno has no references, it is usually cheaper to
+ spill at that loop level instead. Keep searching until we find
+ a parent allocno that does have references (but don't look past
+ the starting allocno). */
+ ira_allocno_t local_parent_a2;
+ for (;;)
+ {
+ local_parent_a2 = ira_parent_allocno (local_a2);
+ if (local_parent_a2 == a2 || ALLOCNO_NREFS (local_parent_a2) != 0)
+ break;
+ local_a2 = local_parent_a2;
+ }
+ if (CHECKING_P)
+ {
+ /* Sanity check to make sure that the conflict we've been given
+ makes sense. */
+ auto test_a2 = local_parent_a2;
+ while (test_a2 != a2)
+ {
+ test_a2 = ira_parent_allocno (test_a2);
+ ira_assert (test_a2);
+ }
+ }
+ if (local_a2
+ && ALLOCNO_NREFS (local_a2) == 0
+ && ira_subloop_allocnos_can_differ_p (local_parent_a2))
+ return local_a2;
+ return nullptr;
+}
+
+/* The caller has decided to allocate HREGNO to A and has proved that
+ this is safe. However, the allocation might require the kind of
+ spilling described in the comment above ira_soft_conflict.
+ The caller has recorded that:
+
+ - The allocnos in ALLOCNOS_TO_SPILL are the ones that would need
+ to be spilled to satisfy soft conflicts for at least one allocation
+ (not necessarily HREGNO).
+
+ - The soft conflicts apply only to A allocations that overlap
+ SOFT_CONFLICT_REGS.
+
+ If allocating HREGNO is subject to any soft conflicts, record the
+ subloop allocnos that need to be spilled. */
+static void
+spill_soft_conflicts (ira_allocno_t a, bitmap allocnos_to_spill,
+ HARD_REG_SET soft_conflict_regs, int hregno)
+{
+ auto nregs = hard_regno_nregs (hregno, ALLOCNO_MODE (a));
+ bitmap_iterator bi;
+ unsigned int i;
+ EXECUTE_IF_SET_IN_BITMAP (allocnos_to_spill, 0, i, bi)
+ {
+ /* SPILL_A needs to be spilled for at least one allocation
+ (not necessarily this one). */
+ auto spill_a = ira_allocnos[i];
+
+ /* Find the corresponding allocno for this loop. */
+ auto conflict_a = spill_a;
+ do
+ {
+ conflict_a = ira_parent_or_cap_allocno (conflict_a);
+ ira_assert (conflict_a);
+ }
+ while (ALLOCNO_LOOP_TREE_NODE (conflict_a)->level
+ > ALLOCNO_LOOP_TREE_NODE (a)->level);
+
+ ira_assert (ALLOCNO_LOOP_TREE_NODE (conflict_a)
+ == ALLOCNO_LOOP_TREE_NODE (a));
+
+ if (conflict_a == a)
+ {
+ /* SPILL_A is a descendant of A. We don't know (and don't need
+ to know) which cap allocnos have a soft conflict with A.
+ All we need to do is test whether the soft conflict applies
+ to the chosen allocation. */
+ if (ira_hard_reg_set_intersection_p (hregno, ALLOCNO_MODE (a),
+ soft_conflict_regs))
+ ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true;
+ }
+ else
+ {
+ /* SPILL_A is a descendant of CONFLICT_A, which has a soft conflict
+ with A. Test whether the soft conflict applies to the current
+ allocation. */
+ ira_assert (ira_soft_conflict (a, conflict_a) == spill_a);
+ auto conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a);
+ ira_assert (conflict_hregno >= 0);
+ auto conflict_nregs = hard_regno_nregs (conflict_hregno,
+ ALLOCNO_MODE (conflict_a));
+ if (hregno + nregs > conflict_hregno
+ && conflict_hregno + conflict_nregs > hregno)
+ ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true;
+ }
+ }
+}
+
/* Choose a hard register for allocno A. If RETRY_P is TRUE, it means
that the function called from function
`ira_reassign_conflict_allocnos' and `allocno_reload_assign'. In
#ifdef STACK_REGS
bool no_stack_reg_p;
#endif
+ auto_bitmap allocnos_to_spill;
+ HARD_REG_SET soft_conflict_regs = {};
ira_assert (! ALLOCNO_ASSIGNED_P (a));
get_conflict_and_start_profitable_regs (a, retry_p,
mode = ALLOCNO_MODE (conflict_a);
conflict_nregs = hard_regno_nregs (hard_regno, mode);
- if (conflict_nregs == n_objects && conflict_nregs > 1)
+ auto spill_a = (retry_p
+ ? nullptr
+ : ira_soft_conflict (a, conflict_a));
+ if (spill_a)
{
- int num = OBJECT_SUBWORD (conflict_obj);
-
- if (REG_WORDS_BIG_ENDIAN)
- SET_HARD_REG_BIT (conflicting_regs[word],
- hard_regno + n_objects - num - 1);
- else
- SET_HARD_REG_BIT (conflicting_regs[word],
- hard_regno + num);
+ if (bitmap_set_bit (allocnos_to_spill,
+ ALLOCNO_NUM (spill_a)))
+ {
+ ira_loop_border_costs border_costs (spill_a);
+ auto cost = border_costs.spill_inside_loop_cost ();
+ auto note_conflict = [&](int r)
+ {
+ SET_HARD_REG_BIT (soft_conflict_regs, r);
+ auto hri = ira_class_hard_reg_index[aclass][r];
+ if (hri >= 0)
+ {
+ costs[hri] += cost;
+ full_costs[hri] += cost;
+ }
+ };
+ for (int r = hard_regno;
+ r >= 0 && (int) end_hard_regno (mode, r) > hard_regno;
+ r--)
+ note_conflict (r);
+ for (int r = hard_regno + 1;
+ r < hard_regno + conflict_nregs;
+ r++)
+ note_conflict (r);
+ }
}
else
- conflicting_regs[word]
- |= ira_reg_mode_hard_regset[hard_regno][mode];
- if (hard_reg_set_subset_p (profitable_hard_regs,
- conflicting_regs[word]))
- goto fail;
+ {
+ if (conflict_nregs == n_objects && conflict_nregs > 1)
+ {
+ int num = OBJECT_SUBWORD (conflict_obj);
+
+ if (REG_WORDS_BIG_ENDIAN)
+ SET_HARD_REG_BIT (conflicting_regs[word],
+ hard_regno + n_objects - num - 1);
+ else
+ SET_HARD_REG_BIT (conflicting_regs[word],
+ hard_regno + num);
+ }
+ else
+ conflicting_regs[word]
+ |= ira_reg_mode_hard_regset[hard_regno][mode];
+ if (hard_reg_set_subset_p (profitable_hard_regs,
+ conflicting_regs[word]))
+ goto fail;
+ }
}
}
else if (! retry_p
{
for (i = hard_regno_nregs (best_hard_regno, mode) - 1; i >= 0; i--)
allocated_hardreg_p[best_hard_regno + i] = true;
+ spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs,
+ best_hard_regno);
}
if (! retry_p)
restore_costs_from_copies (a);
assigning hard register to allocno A even without spilling
conflicting allocnos. */
continue;
+ auto_bitmap allocnos_to_spill;
+ HARD_REG_SET soft_conflict_regs = {};
mode = ALLOCNO_MODE (a);
nwords = ALLOCNO_NUM_OBJECTS (a);
/* Process each allocno conflicting with A and update the cost
ALLOCNO_COLOR_DATA (conflict_a)->temp = check;
if ((conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a)) < 0)
continue;
- spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a);
- k = (ira_class_hard_reg_index
- [ALLOCNO_CLASS (conflict_a)][conflict_hregno]);
- ira_assert (k >= 0);
- if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a))
- != NULL)
- spill_cost -= allocno_costs[k];
+ auto spill_a = ira_soft_conflict (a, conflict_a);
+ if (spill_a)
+ {
+ if (!bitmap_set_bit (allocnos_to_spill,
+ ALLOCNO_NUM (spill_a)))
+ continue;
+ ira_loop_border_costs border_costs (spill_a);
+ spill_cost = border_costs.spill_inside_loop_cost ();
+ }
else
- spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a);
- spill_cost
- += allocno_copy_cost_saving (conflict_a, conflict_hregno);
+ {
+ spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a);
+ k = (ira_class_hard_reg_index
+ [ALLOCNO_CLASS (conflict_a)][conflict_hregno]);
+ ira_assert (k >= 0);
+ if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a))
+ != NULL)
+ spill_cost -= allocno_costs[k];
+ else
+ spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a);
+ spill_cost
+ += allocno_copy_cost_saving (conflict_a, conflict_hregno);
+ }
conflict_nregs = hard_regno_nregs (conflict_hregno,
ALLOCNO_MODE (conflict_a));
+ auto note_conflict = [&](int r)
+ {
+ if (check_hard_reg_p (a, r,
+ conflicting_regs, profitable_hard_regs))
+ {
+ if (spill_a)
+ SET_HARD_REG_BIT (soft_conflict_regs, r);
+ costs[r] += spill_cost;
+ }
+ };
for (r = conflict_hregno;
r >= 0 && (int) end_hard_regno (mode, r) > conflict_hregno;
r--)
- if (check_hard_reg_p (a, r,
- conflicting_regs, profitable_hard_regs))
- costs[r] += spill_cost;
+ note_conflict (r);
for (r = conflict_hregno + 1;
r < conflict_hregno + conflict_nregs;
r++)
- if (check_hard_reg_p (a, r,
- conflicting_regs, profitable_hard_regs))
- costs[r] += spill_cost;
+ note_conflict (r);
}
}
min_cost = INT_MAX;
by spilling some conflicting allocnos does not improve the
allocation cost. */
continue;
+ spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, best);
nregs = hard_regno_nregs (best, mode);
/* Now spill conflicting allocnos which contain a hard register
of A when we assign the best chosen hard register to it. */
--- /dev/null
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+ int data;
+ volatile struct L *next;
+ volatile struct L *inner;
+};
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+ body has no stack accesses. It's possible that we'll need to update this
+ pattern for unrelated code changes. but the test should be XFAILed rather
+ than changed if any new stack accesses occur on the !head->inner path. */
+/*
+** foo:
+** ...
+** ldr (w[0-9]+), \[(x[0-9]+)\]
+** add (w[0-9]+), (?:\3, \1|\1, \3)
+** ldr (x[0-9]+), \[\2, #?16\]
+** str \3, \[\2\]
+** ldr \2, \[\2, #?8\]
+** cbn?z \4, .*
+** ...
+** ret
+*/
+void
+foo (volatile struct L *head, int inc)
+{
+ while (head)
+ {
+ /* Clobber all call-preserved GPRs, so that the loop has to use
+ call-clobbered GPRs if it is to avoid spilling. */
+ asm volatile ("" :::
+ "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28");
+ inc = head->data + inc;
+ volatile struct L *inner = head->inner;
+ head->data = inc;
+ head = head->next;
+ if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+ for (int i = 0; i < 1000; ++i)
+ asm volatile ("" :: /* example allocation: */
+ "r" (i), /* x0 */
+ "r" (inner), /* x1 */
+ "r" (inner->next), /* x2 */
+ "r" (inner->next), /* x3 */
+ "r" (inner->next), /* x4 */
+ "r" (inner->next), /* x5 */
+ "r" (inner->next), /* x6 */
+ "r" (inner->next), /* x7 */
+ "r" (inner->next), /* x8 */
+ "r" (inner->next), /* x9 */
+ "r" (inner->next), /* x10 */
+ "r" (inner->next), /* x11 */
+ "r" (inner->next), /* x12 */
+ "r" (inner->next), /* x13 */
+ "r" (inner->next), /* x14 */
+ "r" (inner->next), /* x15 */
+ "r" (inner->next), /* x16 */
+ "r" (inner->next), /* x17 */
+ "r" (inner->next), /* x18 */
+ "r" (inner->next) : /* x30 */
+ "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28");
+ }
+}