From 67cfefebbbbb3a5923c47c31293a8f76596de8be Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Thu, 6 May 2021 20:27:03 +0900 Subject: [PATCH] [AMDGPU] Fix WQM failure with single block inactive demote Instruction test for inactive kill/demote needs to be based on actual opcode not whether instruction would be lowered to demote. Reviewed By: piotr Differential Revision: https://reviews.llvm.org/D101966 --- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 2 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll | 38 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 51a019b..38548ea 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -923,7 +923,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB, } else { // Static: kill does nothing MachineInstr *NewTerm = nullptr; - if (IsDemote) { + if (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) { LIS->RemoveMachineInstrFromMaps(MI); } else { assert(MBB.succ_size() == 1); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll index e88b70f..cff597b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll @@ -1184,6 +1184,44 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ret void } +define amdgpu_ps void @static_exact_nop(float %arg0, float %arg1) { +; SI-LABEL: static_exact_nop: +; SI: ; %bb.0: ; %.entry +; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc +; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm +; SI-NEXT: s_endpgm +; +; GFX9-LABEL: static_exact_nop: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc +; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm +; GFX9-NEXT: s_endpgm +; +; GFX10-32-LABEL: static_exact_nop: +; GFX10-32: ; %bb.0: ; %.entry +; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0 +; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo +; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm +; GFX10-32-NEXT: s_endpgm +; +; GFX10-64-LABEL: static_exact_nop: +; GFX10-64: ; %bb.0: ; %.entry +; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 +; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc +; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm +; GFX10-64-NEXT: s_endpgm +.entry: + %c0 = fcmp olt float %arg0, 0.000000e+00 + %c1 = fcmp oge float %arg1, 0.0 + call void @llvm.amdgcn.wqm.demote(i1 true) + %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00 + call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0 + ret void +} + + declare void @llvm.amdgcn.wqm.demote(i1) #0 declare i1 @llvm.amdgcn.live.mask() #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 -- 2.7.4