From 370aa2f88ffabae5831bbc350c03d7dcc757580b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 15 Jun 2022 17:04:05 -0400 Subject: [PATCH] InlineSpiller: Don't fold spills into undef reads This was producing a load into a dead register which was a verifier error. --- llvm/lib/CodeGen/InlineSpiller.cpp | 7 ++ .../test/CodeGen/AMDGPU/fold-restore-undef-use.mir | 92 ++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 4cf7c20..06c6608 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -838,6 +838,13 @@ foldMemoryOperand(ArrayRef> Ops, unsigned Idx = OpPair.second; assert(MI == OpPair.first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); + + // No point restoring an undef read, and we'll produce an invalid live + // interval. + // TODO: Is this really the correct way to handle undef tied uses? + if (MO.isUse() && !MO.readsReg() && !MO.isTied()) + continue; + if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; diff --git a/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir new file mode 100644 index 0000000..3616d61 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -stress-regalloc=4 -verify-regalloc -start-before=greedy,0 -stop-after=virtregrewriter,0 %s -o - | FileCheck %s + +# Check that we don't generate *** Bad machine code: Instruction loads +# from dead spill slot *** + + +--- +name: restore_undef_copy_use +tracksRegLiveness: true +machineFunctionInfo: + maxKernArgAlign: 1 + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 8 +body: | + ; CHECK-LABEL: name: restore_undef_copy_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 undef renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: $sgpr6_sgpr7 = KILL undef renamable $sgpr6_sgpr7 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:sgpr_64 = COPY $sgpr10_sgpr11 + S_CBRANCH_EXECZ %bb.3, implicit $exec + + bb.1: + %1:sreg_64 = S_OR_SAVEEXEC_B64 undef %2:sreg_64, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64_term $exec, %1, implicit-def $scc + S_CBRANCH_EXECZ %bb.5, implicit $exec + + bb.2: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; A restore for %0 should not be inserted here. + $sgpr6_sgpr7 = COPY undef %0 + dead $sgpr30_sgpr31 = SI_CALL undef %1, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_BRANCH %bb.5 + + bb.3: + + bb.4: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + dead $sgpr30_sgpr31 = SI_CALL undef %3:sreg_64_xexec, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr4_sgpr5 = COPY %0 + dead $sgpr30_sgpr31 = SI_CALL undef %3:sreg_64_xexec, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + + bb.5: + +... + -- 2.7.4