From 497498c878d48754318e486428e2aa30854020b9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 9 Mar 2020 19:42:57 +0000 Subject: [PATCH] lra: Tighten check for reloading paradoxical subregs [PR94052] simplify_operand_subreg tries to detect whether the allocation for a pseudo in a paradoxical subreg is also valid for the outer mode. The condition it used to check for an invalid combination was: else if (REG_P (reg) && REGNO (reg) >= FIRST_PSEUDO_REGISTER && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0 && (hard_regno_nregs (hard_regno, innermode) < hard_regno_nregs (hard_regno, mode)) && (regclass = lra_get_allocno_class (REGNO (reg))) && (type != OP_IN || !in_hard_reg_set_p (reg_class_contents[regclass], mode, hard_regno) || overlaps_hard_reg_set_p (lra_no_alloc_regs, mode, hard_regno))) I think there are two problems with this: (1) It never actually checks whether the hard register is valid for the outer mode (in the hard_regno_mode_ok sense). If it isn't, any attempt to reload in the outer mode is likely to cycle, because the implied regno/mode combination will be just as invalid next time curr_insn_transform sees the subreg. (2) The check is valid for little-endian only. For big-endian we need to move hard_regno backwards. Using simplify_subreg_regno should avoid both problems. As the existing comment says, IRA should always take subreg references into account when allocating hard registers, so this fix-up should only really be needed for pseudos allocated by LRA itself. gcc/ 2020-03-21 Richard Sandiford PR rtl-optimization/94052 * lra-constraints.c (simplify_operand_subreg): Reload the inner register of a paradoxical subreg if simplify_subreg_regno fails to give a valid hard register for the outer mode. gcc/testsuite/ 2020-03-21 Tamar Christina PR target/94052 * gcc.target/aarch64/pr94052.C: New test. --- gcc/ChangeLog | 7 ++ gcc/lra-constraints.c | 24 ++-- gcc/testsuite/ChangeLog | 5 + gcc/testsuite/g++.target/aarch64/pr94052.C | 174 +++++++++++++++++++++++++++++ 4 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/pr94052.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c5d3fb9..e55a8e7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2020-03-21 Richard Sandiford + + PR rtl-optimization/94052 + * lra-constraints.c (simplify_operand_subreg): Reload the inner + register of a paradoxical subreg if simplify_subreg_regno fails + to give a valid hard register for the outer mode. + 2020-03-20 Martin Jambor PR tree-optimization/93435 diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index f71e0c9..bf6d4a2 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1489,7 +1489,7 @@ static bool process_address (int, bool, rtx_insn **, rtx_insn **); static bool simplify_operand_subreg (int nop, machine_mode reg_mode) { - int hard_regno; + int hard_regno, inner_hard_regno; rtx_insn *before, *after; machine_mode mode, innermode; rtx reg, new_reg; @@ -1735,15 +1735,19 @@ simplify_operand_subreg (int nop, machine_mode reg_mode) for the new uses. */ else if (REG_P (reg) && REGNO (reg) >= FIRST_PSEUDO_REGISTER - && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0 - && (hard_regno_nregs (hard_regno, innermode) - < hard_regno_nregs (hard_regno, mode)) - && (regclass = lra_get_allocno_class (REGNO (reg))) - && (type != OP_IN - || !in_hard_reg_set_p (reg_class_contents[regclass], - mode, hard_regno) - || overlaps_hard_reg_set_p (lra_no_alloc_regs, - mode, hard_regno))) + && paradoxical_subreg_p (operand) + && (inner_hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0 + && ((hard_regno + = simplify_subreg_regno (inner_hard_regno, innermode, + SUBREG_BYTE (operand), mode)) < 0 + || ((hard_regno_nregs (inner_hard_regno, innermode) + < hard_regno_nregs (hard_regno, mode)) + && (regclass = lra_get_allocno_class (REGNO (reg))) + && (type != OP_IN + || !in_hard_reg_set_p (reg_class_contents[regclass], + mode, hard_regno) + || overlaps_hard_reg_set_p (lra_no_alloc_regs, + mode, hard_regno))))) { /* The class will be defined later in curr_insn_transform. */ enum reg_class rclass diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ce99c98..bff5d821 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-03-21 Tamar Christina + + PR target/94052 + * gcc.target/aarch64/pr94052.C: New test. + 2020-03-20 Martin Jambor PR tree-optimization/93435 diff --git a/gcc/testsuite/g++.target/aarch64/pr94052.C b/gcc/testsuite/g++.target/aarch64/pr94052.C new file mode 100644 index 0000000..d36c9bd --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/pr94052.C @@ -0,0 +1,174 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -std=gnu++11 -w" } */ + +namespace c { +typedef int d; +template struct f { typedef e g; }; +template struct h; +template e aa(typename f::g i) { return i; } +template struct j {}; +template struct k; +template struct k<1, j> { typedef m g; }; +template typename k>::g ab(j); +} // namespace c +typedef long d; +typedef char o; +typedef int p; +typedef char q; +typedef int r; +namespace { +struct s; +constexpr d t = 6; +template class ad { +public: + static constexpr d u = t; + d v(); + d x(); + d y(); +}; +class z : ad {}; +struct ae { + p af; +}; +class ag { +public: + ae ah(); +}; +} // namespace +typedef __Int32x4_t ai; +typedef struct { + ai aj[2]; +} ak; +typedef int al; +void am(p *a, ai b) { __builtin_aarch64_st1v4si(a, b); } +namespace an { +class ao { +public: + bool operator==(ao); + d v(); + d x(); +}; +class ap : public ad {}; +class aq { +public: + c::j ar(); + int as(); + int at(); +}; +class au { +public: + virtual d av(d); + virtual ap aw(); + virtual ag ax(); +}; +class ay {}; +class az { + virtual void ba(const ay &, const s &); +}; +using bb = az; +class bc; +class bd : bb { + void ba(const ay &, const s &); + bc *be; + bc *bf; + bc *bg; + aq bh; + int bi; + int bj; + ao bk; +}; +namespace bl { +namespace bm { +namespace bn { +class bo; +} +} // namespace bm +} // namespace bl +namespace bn { +template > +ai bp(ac *, ac *, ac *, al, al, al, d, p); +template > +ak bq(ac *br, ac *bs, ac *bt, al bu, al bv, al bw, d bx, int, int by) { + ak{bp(br, bs, bt, bu, bv, bw, bx, by), bp(br, bs, bt, bu, bv, bw, bx, by)}; +} +template > +ak bz(ac *, ac *, ac *, al, al, al &, int, p); +template void ca(p *, const ak &); +template <> void ca<1>(p *buffer, const ak &cb) { + am(buffer, cb.aj[0]); + am(buffer + 4, cb.aj[1]); +} +int cc(int, int); +} // namespace bn +class bc { +public: + virtual au *cd(); +}; +class ce { +public: + q *cf(); +}; +template struct cg { + template static void ci(ay, z cj, ch ck) { ck(cj); } +}; +template void cl(ay w, ch ck) { + z cj; + cg::ci(w, cj, c::aa(ck)); +} +namespace { +template class co { +public: + static void convolve(ay, int cs, bc *cp, bc *cq, bc *cr, aq cw, int, ao ct) { + int by = cp->cd()->ax().ah().af; + int cu = cq->cd()->ax().ah().af; + cp->cd()->aw().v(); + int cv = cp->cd()->aw().x(); + cp->cd()->aw().y(); + cp->cd()->aw(); + int da = cr->cd()->aw().x(); + int cx = cq->cd()->aw().x(); + cq->cd()->aw().y(); + int cy = cr->cd()->av(0); + int cz = cr->cd()->av(1); + bn::cc(cs, cn); + int de = c::ab<1>(cw.ar()); + cw.as(); + cw.at(); + ay db; + ce dc; + ce dd; + ce w; + q *di = w.cf(); + cl(db, [&](z) { + int df; + dc; + di; + cx; + auto dg(cu); + auto dh(cu); + auto dl(cu); + for (; cz; df += de) { + auto br = reinterpret_cast(cv); + auto bs = reinterpret_cast(cv); + auto bt = reinterpret_cast(df * ct.x()); + auto dj = reinterpret_cast(dd.cf() + da); + for (int dk; dk < cy; dk += cs, dj += cs) + if (ct == ao()) { + auto vres = bn::bz(br, bs, bt, dg, dh, dl, cn, by); + bn::ca(dj, vres); + } else + bn::bq(br, bs, bt, dg, dh, dl, ct.v(), cn, by); + } + }); + } +}; +template +void bz(ay dm, int cs, bc *cp, bc *cq, bc *cr, aq cw, int dn, ao ct) { + co::convolve(dm, cs, cp, cq, cr, cw, dn, ct); + co::convolve(dm, cs, cp, cq, cr, cw, dn, ct); +} +} // namespace +void bd::ba(const ay &dm, const s &) { + bz(dm, bi, be, bg, bf, bh, bj, bk); +} +} // namespace an -- 2.7.4