From 1a6aa8b1952b289c99adb83309ee7bc55b12fa97 Mon Sep 17 00:00:00 2001 From: Thomas Symalla Date: Thu, 31 Mar 2022 12:23:38 +0200 Subject: [PATCH] [AMDGPU] Add missing use check in SIOptimizeExecMasking pass. Whenever a v_cmp, s_and_saveexec instruction sequence shall be transformed to an equivalent s_mov, v_cmpx sequence, it needs to be detected if the v_cmp target register is used between the two instructions as the v_cmp result gets omitted by using the v_cmpx instruction, resulting in invalid code. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D122797 --- llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 40 ++++++++++++++++------ .../test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir | 21 ++++++++++++ 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index e6e948c..4c61892 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -322,26 +322,44 @@ findInstrBackwards(MachineInstr &Origin, return nullptr; } + // Determine if a register Reg is not re-defined and still in use -// in the range (Stop..BB.end]. +// in the range (Stop..Start]. // It does so by backwards calculating liveness from the end of the BB until // either Stop or the beginning of the BB is reached. // After liveness is calculated, we can determine if Reg is still in use and not // defined inbetween the instructions. -static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, - const SIRegisterInfo *TRI, - MachineRegisterInfo &MRI) { +static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, + MCRegister Reg, const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI, + bool useLiveOuts = false, + bool ignoreStart = false) { LivePhysRegs LR(*TRI); - LR.addLiveOuts(*Stop.getParent()); + if (useLiveOuts) + LR.addLiveOuts(*Stop.getParent()); + + MachineBasicBlock::reverse_iterator A(Start); + MachineBasicBlock::reverse_iterator E(Stop); + + if (ignoreStart) + ++A; - for (auto A = Stop.getParent()->rbegin(); - A != Stop.getParent()->rend() && A != Stop; ++A) { + for (; A != Stop.getParent()->rend() && A != Stop; ++A) { LR.stepBackward(*A); } return !LR.available(MRI, Reg); } +// Determine if a register Reg is not re-defined and still in use +// in the range (Stop..BB.end]. +static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, + const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI) { + return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, TRI, + MRI, true); +} + // Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence // by looking at an instance of a s_and_saveexec instruction. Returns a pointer // to the v_cmp instruction if it is safe to replace the sequence (see the @@ -396,9 +414,11 @@ static MachineInstr *findPossibleVCMPVCMPXOptimization( if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) return nullptr; - // If the v_cmp target is in use after the s_and_saveexec, skip the - // optimization. - if (isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI)) + // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the + // s_and_saveexec, skip the optimization. + if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), TRI, MRI, + false, true) || + isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI)) return nullptr; // Try to determine if there is a write to any of the VCmp diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir index ecebd6f..fceca41 100644 --- a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir @@ -25,6 +25,27 @@ body: | ... --- + +# Ensure the transformation does not get applied when the v_cmp target is used before the s_and_saveexec instruction. + +# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_exec_intermediate_use +# GCN: V_CMP_LT_F32_e64 +# GCN: V_WRITELANE_B32 +# GCN: S_AND_SAVEEXEC_B64 +name: vcmp_saveexec_to_mov_vcmpx_exec_intermediate_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr2 + renamable $sgpr0_sgpr1 = V_CMP_LT_F32_e64 0, 953267991, 2, $vgpr1, 0, implicit $mode, implicit $exec + $vgpr0 = V_WRITELANE_B32 0, $sgpr0, $vgpr0 + $sgpr2_sgpr3 = COPY $exec, implicit-def $exec + $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 +... + +--- + # Check if the modifiers are preserved when generating the V_CMPX instruction. # GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_check_abs -- 2.7.4