From b3a9b685132a19d3b5252cca6d0b5569c848451c Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Thu, 21 Jun 2018 13:36:08 +0000 Subject: [PATCH] AMDGPU: Add implicit def of SCC to kill and indirect pseudos Summary: Kill instructions sometimes do use SCC in unusual circumstances, when v_cmpx cannot be used due to the operands that are involved. Additionally, even if SCC was never defined by the expansion, kill pseudos could previously occur between an s_cmp and an s_cbranch_scc, which breaks the SCC liveness tracking when the pseudo is expanded to split the basic block. While it would be possible to explicitly mark the SCC as live-in for the successor basic block, it's simpler to just mark the pseudo as using SCC, so that such a sequence is never emitted by instruction selection in the first place. A similar issue affects indirect source/dest pseudos in principle, although I haven't been able to come up with a test case where it actually matters (this affects instruction selection, so a MIR test can't be used). Fixes: dEQP-GLES3.functional.shaders.discard.dynamic_loop_always Change-Id: Ica8d82ecff1a763b892a1112cf1b06c948863a4f Reviewers: arsenm, rampitec Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47761 llvm-svn: 335223 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 12 ++++++++++-- .../test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir | 2 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index a3b4ea0..87a891b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -292,14 +292,21 @@ def SI_ELSE_BREAK : CFPseudoInstSI < let isReMaterializable = 1; } -let Uses = [EXEC], Defs = [EXEC,VCC] in { +let Uses = [EXEC] in { multiclass PseudoInstKill { + // Even though this pseudo can usually be expanded without an SCC def, we + // conservatively assume that it has an SCC def, both because it is sometimes + // required in degenerate cases (when V_CMPX cannot be used due to constant + // bus limitations) and because it allows us to avoid having to track SCC + // liveness across basic blocks. + let Defs = [EXEC,VCC,SCC] in def _PSEUDO : PseudoInstSI <(outs), ins> { let isConvergent = 1; let usesCustomInserter = 1; } + let Defs = [EXEC,VCC,SCC] in def _TERMINATOR : SPseudoInstSI <(outs), ins> { let isTerminator = 1; } @@ -308,6 +315,7 @@ multiclass PseudoInstKill { defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>; defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>; +let Defs = [EXEC,VCC] in def SI_ILLEGAL_COPY : SPseudoInstSI < (outs unknown:$dst), (ins unknown:$src), [], " ; illegal copy $src to $dst">; @@ -445,7 +453,7 @@ def ADJCALLSTACKDOWN : SPseudoInstSI< let usesCustomInserter = 1; } -let Defs = [M0, EXEC], +let Defs = [M0, EXEC, SCC], UseNamedOperandTable = 1 in { class SI_INDIRECT_SRC : VPseudoInstSI < diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir index 8f034c37e..c842d84 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir @@ -33,7 +33,7 @@ body: | bb.1: successors: %bb.2 $vgpr0 = V_MOV_B32_e32 0, implicit $exec - SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit $exec + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec S_BRANCH %bb.2 bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll index fabed79..bd8684d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -251,6 +251,26 @@ define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 { ret void } +; SI-LABEL: {{^}}test_scc_liveness: +; SI: v_cmp +; SI: s_and_b64 exec +; SI: s_cmp +; SI: s_cbranch_scc +define amdgpu_ps void @test_scc_liveness() #0 { +main_body: + br label %loop3 + +loop3: ; preds = %loop3, %main_body + %tmp = phi i32 [ 0, %main_body ], [ %tmp5, %loop3 ] + %tmp1 = icmp sgt i32 %tmp, 0 + call void @llvm.amdgcn.kill(i1 %tmp1) #1 + %tmp5 = add i32 %tmp, 1 + br i1 %tmp1, label %endloop15, label %loop3 + +endloop15: ; preds = %loop3 + ret void +} + declare void @llvm.amdgcn.kill(i1) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare i1 @llvm.amdgcn.wqm.vote(i1) -- 2.7.4