From c74271c5376437fb79f58a33f509babce2ef1e21 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 26 Mar 2019 21:27:15 +0000 Subject: [PATCH] [LiveRange] Reset the VNIs when splitting subranges When splitting a subrange we end up with two different subranges covering two different, non overlapping, lanes. As part of this splitting the VNIs of the original live-range need to be dispatched to the subranges according to which lanes they are actually defining. Prior to this patch we were assuming that all values were defining all lanes. This was wrong as demonstrated by llvm.org/PR40835. Differential Revision: https://reviews.llvm.org/D59731 llvm-svn: 357032 --- llvm/include/llvm/CodeGen/LiveInterval.h | 9 ++- llvm/lib/CodeGen/LiveInterval.cpp | 53 +++++++++++- llvm/lib/CodeGen/LiveRangeCalc.cpp | 9 ++- llvm/lib/CodeGen/RegisterCoalescer.cpp | 48 ++++++----- llvm/lib/CodeGen/SplitKit.cpp | 9 ++- .../CodeGen/SystemZ/regcoal-subranges-update.mir | 94 ++++++++++++++++++++++ 6 files changed, 189 insertions(+), 33 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index 622c124..2cead89 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -789,8 +789,15 @@ namespace llvm { /// L000F, refining for mask L0018. Will split the L00F0 lane into /// L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod /// function will be applied to the L0010 and L0008 subranges. + /// + /// \p Indexes and \p TRI are required to clean up the VNIs that + /// don't defne the related lane masks after they get shrunk. E.g., + /// when L000F gets split into L0007 and L0008 maybe only a subset + /// of the VNIs that defined L000F defines L0007. void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, - std::function Apply); + std::function Apply, + const SlotIndexes &Indexes, + const TargetRegisterInfo &TRI); bool operator<(const LiveInterval& other) const { const SlotIndex &thisIndex = beginIndex(); diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 821680e..3dc0303 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -879,8 +879,53 @@ void LiveInterval::clearSubRanges() { SubRanges = nullptr; } -void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, - LaneBitmask LaneMask, std::function Apply) { +/// For each VNI in \p SR, check whether or not that value defines part +/// of the mask describe by \p LaneMask and if not, remove that value +/// from \p SR. +static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, + LaneBitmask LaneMask, + const SlotIndexes &Indexes, + const TargetRegisterInfo &TRI) { + // Phys reg should not be tracked at subreg level. + // Same for noreg (Reg == 0). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg) + return; + // Remove the values that don't define those lanes. + SmallVector ToBeRemoved; + for (VNInfo *VNI : SR.valnos) { + if (VNI->isUnused()) + continue; + // PHI definitions don't have MI attached, so there is nothing + // we can use to strip the VNI. + if (VNI->isPHIDef()) + continue; + const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def); + assert(MI && "Cannot find the definition of a value"); + bool hasDef = false; + for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (MOI->getReg() != Reg) + continue; + if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none()) + continue; + hasDef = true; + break; + } + + if (!hasDef) + ToBeRemoved.push_back(VNI); + } + for (VNInfo *VNI : ToBeRemoved) + SR.removeValNo(VNI); + + assert(!SR.empty() && "At least one value should be defined by this mask"); +} + +void LiveInterval::refineSubRanges( + BumpPtrAllocator &Allocator, LaneBitmask LaneMask, + std::function Apply, + const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) { LaneBitmask ToApply = LaneMask; for (SubRange &SR : subranges()) { LaneBitmask SRMask = SR.LaneMask; @@ -898,6 +943,10 @@ void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, SR.LaneMask = SRMask & ~Matching; // Create a new subrange for the matching part MatchingRange = createSubRangeFrom(Allocator, Matching, SR); + // Now that the subrange is split in half, make sure we + // only keep in the subranges the VNIs that touch the related half. + stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI); + stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI); } Apply(*MatchingRange); ToApply &= ~Matching; diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index 0020221..d670f28 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -95,10 +95,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { } LI.refineSubRanges(*Alloc, SubMask, - [&MO, this](LiveInterval::SubRange &SR) { - if (MO.isDef()) - createDeadDef(*Indexes, *Alloc, SR, MO); - }); + [&MO, this](LiveInterval::SubRange &SR) { + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, SR, MO); + }, + *Indexes, TRI); } // Create the def in the main liverange. We do not have to do this if diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 4f10777..40b7e8b 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -924,23 +924,25 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, } SlotIndex AIdx = CopyIdx.getRegSlot(true); LaneBitmask MaskA; + const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (LiveInterval::SubRange &SA : IntA.subranges()) { VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); assert(ASubValNo != nullptr); MaskA |= SA.LaneMask; - IntB.refineSubRanges(Allocator, SA.LaneMask, - [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB] - (LiveInterval::SubRange &SR) { - VNInfo *BSubValNo = SR.empty() - ? SR.getNextValue(CopyIdx, Allocator) - : SR.getVNInfoAt(CopyIdx); - assert(BSubValNo != nullptr); - auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); - ShrinkB |= P.second; - if (P.first) - BSubValNo->def = ASubValNo->def; - }); + IntB.refineSubRanges( + Allocator, SA.LaneMask, + [&Allocator, &SA, CopyIdx, ASubValNo, + &ShrinkB](LiveInterval::SubRange &SR) { + VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator) + : SR.getVNInfoAt(CopyIdx); + assert(BSubValNo != nullptr); + auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + ShrinkB |= P.second; + if (P.first) + BSubValNo->def = ASubValNo->def; + }, + Indexes, *TRI); } // Go over all subranges of IntB that have not been covered by IntA, // and delete the segments starting at CopyIdx. This can happen if @@ -3262,16 +3264,18 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, LaneBitmask LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LI.refineSubRanges(Allocator, LaneMask, - [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) { - if (SR.empty()) { - SR.assign(ToMerge, Allocator); - } else { - // joinSubRegRange() destroys the merged range, so we need a copy. - LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); - } - }); + LI.refineSubRanges( + Allocator, LaneMask, + [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) { + if (SR.empty()) { + SR.assign(ToMerge, Allocator); + } else { + // joinSubRegRange() destroys the merged range, so we need a copy. + LiveRange RangeCopy(ToMerge, Allocator); + joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); + } + }, + *LIS->getSlotIndexes(), *TRI); } bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 0bdbdce3..5c944fe 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -520,17 +520,18 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg, .addReg(FromReg, 0, SubIdx); BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (FirstCopy) { - SlotIndexes &Indexes = *LIS.getSlotIndexes(); Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } else { CopyMI->bundleWithPred(); } LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx); DestLI.refineSubRanges(Allocator, LaneMask, - [Def, &Allocator](LiveInterval::SubRange& SR) { - SR.createDeadDef(Def, Allocator); - }); + [Def, &Allocator](LiveInterval::SubRange &SR) { + SR.createDeadDef(Def, Allocator); + }, + Indexes, TRI); return Def; } diff --git a/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir b/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir new file mode 100644 index 0000000..b040f27 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple s390x-ibm-linux -mcpu=z13 -systemz-subreg-liveness -verify-machineinstrs -start-before simple-register-coalescing -stop-after greedy -o - %s | FileCheck %s + +# Check that when we split the live-range with several active lanes +# as part of the live-range update, we correctly eliminate the VNI from +# the relevant part. +# +# In this specific test, the register coalescer will: +# 1. Merge %0 with %1, creating a live-range for the full value subreg_l32 + subreg_h32 +# (actually %0 gets merge with %1 via rematerialization, and technically %0 and %1 +# remain two different live-ranges.) +# 2. Merge %2 with %1 triggering a split into the subreg_l32 + subreg_h32 ranges, since +# %2 only touches subreg_l32. As part of the split the subrange covering subreg_h32 +# must contain only the VNI for the high part (i.e., the one tied with the remaaat of %0). +# This used to be broken and trigger a machine verifier error, because we were not +# clearing the dead value w.r.t. lanes when doing the splitting. I.e., we were ending +# with a subrange referring a value that did not define that lane. +# +# PR40835 +--- +name: main +tracksRegLiveness: true +body: | + bb.0: + + ; CHECK-LABEL: name: main + ; CHECK: [[LGHI:%[0-9]+]]:gr64bit = LGHI 43 + ; CHECK: [[LGHI1:%[0-9]+]]:gr64bit = LGHI 43 + ; CHECK: [[LGHI1]].subreg_l32:gr64bit = MSR [[LGHI1]].subreg_l32, [[LGHI1]].subreg_l32 + ; CHECK: [[LGHI1]].subreg_l32:gr64bit = AHIMux [[LGHI1]].subreg_l32, 9, implicit-def dead $cc + ; CHECK: undef %3.subreg_l64:gr128bit = LGFI -245143785, implicit [[LGHI1]].subreg_l32 + ; CHECK: [[DLGR:%[0-9]+]]:gr128bit = DLGR [[DLGR]], [[LGHI]] + ; CHECK: Return implicit [[DLGR]] + %0:gr64bit = LGHI 43 + %1:gr32bit = COPY %0.subreg_l32 + %1:gr32bit = MSR %1, %1 + %2:gr32bit = COPY killed %1 + %2:gr32bit = AHIMux killed %2, 9, implicit-def dead $cc + undef %3.subreg_l64:gr128bit = LGFI -245143785, implicit killed %2 + %3:gr128bit = DLGR %3:gr128bit, killed %0 + Return implicit killed %3 + +... + +# Make sure the compiler does not choke on VNIs that don't +# an explicit MI as definition. +# In that specific example, this is the PHI not explicitly +# represented for the value carried by %7. +--- +name: segfault +tracksRegLiveness: true +liveins: [] +body: | + ; CHECK-LABEL: name: segfault + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[LGHI:%[0-9]+]]:addr64bit = LGHI 0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0 + ; CHECK: [[LGFR:%[0-9]+]]:gr64bit = LGFR [[LGHI]].subreg_l32 + ; CHECK: $r2d = LGHI 123 + ; CHECK: $r3d = LGHI 0 + ; CHECK: $r4d = LGHI 0 + ; CHECK: $r5d = COPY [[LGFR]] + ; CHECK: KILL killed $r2d, killed $r3d, killed $r4d, $r5d, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc + ; CHECK: ADJCALLSTACKUP 0, 0 + ; CHECK: [[LGHI]]:addr64bit = nuw nsw LA [[LGHI]], 1, $noreg + ; CHECK: J %bb.1 + bb.0: + successors: %bb.1(0x80000000) + + %2:gr64bit = LGHI 0 + %5:gr64bit = LGHI 123 + %7:addr64bit = COPY %2 + + bb.1: + successors: %bb.1(0x80000000) + + %0:addr64bit = COPY killed %7 + ADJCALLSTACKDOWN 0, 0 + %3:gr32bit = COPY %0.subreg_l32 + %4:gr64bit = LGFR killed %3 + $r2d = COPY %5 + $r3d = COPY %2 + $r4d = COPY %2 + $r5d = COPY killed %4 + KILL killed $r2d, killed $r3d, killed $r4d, killed $r5d, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc + ADJCALLSTACKUP 0, 0 + %1:gr64bit = nuw nsw LA killed %0, 1, $noreg + %7:addr64bit = COPY killed %1 + J %bb.1 + +... -- 2.7.4