From c464dddccbd8b7cf4fc6cf51126ab559cd34749e Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 23 Apr 2019 17:59:26 +0000 Subject: [PATCH] [AMDGPU] Fixed addReg() in SIOptimizeExecMaskingPreRA.cpp The second argument is flags, not subreg. Differential Revision: https://reviews.llvm.org/D61031 llvm-svn: 359017 --- .../Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 2 +- .../AMDGPU/optimize-negated-cond-exec-masking.mir | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index c8a1467..6340615 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -246,7 +246,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), And->getOperand(0).getReg()) .addReg(ExecReg) - .addReg(CCReg, CC->getSubReg()); + .addReg(CCReg, 0, CC->getSubReg()); And->eraseFromParent(); LIS->InsertMachineInstrInMaps(*Andn2); diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir index a9b8d94..4986f51 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir @@ -463,3 +463,25 @@ body: | bb.4: S_ENDPGM 0 ... + +# GCN: name: negated_cond_subreg +# GCN: %0.sub0_sub1:sreg_128 = IMPLICIT_DEF +# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0.sub0_sub1, implicit-def $scc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +--- +name: negated_cond_subreg +body: | + bb.0: + %0.sub0_sub1:sreg_128 = IMPLICIT_DEF + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0.sub0_sub1, implicit $exec + %2.sub0_sub1:sreg_128 = V_CMP_NE_U32_e64 %1, 1, implicit $exec + $vcc = S_AND_B64 $exec, killed %2.sub0_sub1:sreg_128, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc + S_BRANCH %bb.1 + + bb.1: + S_BRANCH %bb.0 + + bb.2: + S_ENDPGM 0 +... -- 2.7.4