From ccb3c8e8613413846d6c2f17cc1c1e2a8b6a98ef Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 3 Jun 2020 16:15:23 -0700 Subject: [PATCH] [RegisterCoalescer] Update empty subranges when rematerializing When we rematerialize a value as part of the coalescing, we may widen the register class of the destination register. When this happens, updateRegDefUses may create additional subranges to account for the wider register class. The created subranges are empty and if they are not defined by the rematerialized instruction we clean them up. However, if they are defined by the rematerialized instruction but unused, we failed to flag them as dead definition and would leave them as empty live-range. This is wrong because empty live-ranges don't interfere with anything, thus if we don't fix them, we would fail to account that the rematerialized instruction clobbers some lanes. E.g., let us consider the following pseudo code: def.lane_low64:reg128 = ldimm newdef:reg32 = COPY def.lane_low64_low32 When rematerialization happens for newdef, we end up with: newdef.lane_low64:reg128 = ldimm = use newdef.lane_low64_low32 Let's look at the live interval of newdef. Before rematerialization, we would get: newdef [defIdx, useIdx:0) 0@defIdx Right after updateRegDefUses, newdef register class is widen to reg128 and the subrange definitions will be augmented to fill the subreg that is used at the definition point, here lane_low64. The resulting live interval would be: newdef [newDefIdx, useIdx:0) 0@newDefIdx * lane_low64_high32 EMPTY * lane_low64_low32 [newDefIdx, useIdx:0) Before this patch this would be the final status of the live interval. Therefore we miss that lane_low64_high32 is actually live on the definition point of newdef. With this patch, after rematerializing, we check all the added subranges and for the ones that are defined but empty, we flag them as dead def. Thus, in that case, newdef would look like this: newdef [newDefIdx, useIdx:0) 0@newDefIdx * lane_low64_high32 [newDefIdx, newDefIdxDead) ; <-- instead of EMPTY * lane_low64_low32 [newDefIdx, useIdx:0) This fixes https://www.llvm.org/PR46154 --- llvm/lib/CodeGen/RegisterCoalescer.cpp | 11 ++++++ .../SystemZ/regcoal_remat_empty_subrange.ll | 40 ++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 9c1efe6..e24de55 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1445,6 +1445,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); bool UpdatedSubRanges = false; + SlotIndex DefIndex = + CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); + VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { if ((SR.LaneMask & DstMask).none()) { LLVM_DEBUG(dbgs() @@ -1455,6 +1458,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR.removeValNo(RmValNo); UpdatedSubRanges = true; } + } else { + // We know that this lane is defined by this instruction, + // but at this point it may be empty because it is not used by + // anything. This happens when updateRegDefUses adds the missing + // lanes. Assign that lane a dead def so that the interferences + // are properly modeled. + if (SR.empty()) + SR.createDeadDef(DefIndex, Alloc); } } if (UpdatedSubRanges) diff --git a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll new file mode 100644 index 0000000..d863023 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -O3 -pre-RA-sched=list-ilp -systemz-subreg-liveness -stress-sched -terminal-rule %s -o - | FileCheck %s +; This test used to fail because we were creating empty subranges +; instead of subranges with dead defs while rematerializing values +; during coalescing. +; +; PR46154 + +@g_39 = external dso_local unnamed_addr global i64, align 8 +@g_151 = external dso_local global i32, align 4 +@g_222 = external dso_local unnamed_addr global [7 x [10 x i8]], align 2 + +define void @main() { +; CHECK-LABEL: main: +; CHECK: # %bb.0: +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: larl %r1, g_151 +; CHECK-NEXT: lghi %r3, 0 +; CHECK-NEXT: chi %r0, 0 +; CHECK-NEXT: locghile %r3, 1 +; CHECK-NEXT: o %r0, 0(%r1) +; CHECK-NEXT: dsgfr %r2, %r0 +; CHECK-NEXT: larl %r1, g_222 +; CHECK-NEXT: lghi %r5, 0 +; CHECK-NEXT: stgrl %r2, g_39 +; CHECK-NEXT: stc %r5, 19(%r1) +; CHECK-NEXT: br %r14 + %tmp = load i32, i32* @g_151, align 4 + %tmp3 = or i32 %tmp, 1 + %tmp4 = sext i32 %tmp3 to i64 + %tmp5 = srem i64 0, %tmp4 + %tmp6 = trunc i64 %tmp5 to i8 + store i8 %tmp6, i8* getelementptr inbounds ([7 x [10 x i8]], [7 x [10 x i8]]* @g_222, i64 0, i64 1, i64 9), align 1 + %tmp7 = icmp slt i16 undef, 1 + %tmp8 = zext i1 %tmp7 to i64 + %tmp9 = srem i64 %tmp8, %tmp4 + store i64 %tmp9, i64* @g_39, align 8 + ret void +} + -- 2.7.4