From 76b7d3432e38bb7690c3bbd4940786b5cb751b95 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 12 Jul 2021 12:27:34 -0700 Subject: [PATCH] [AMDGPU] Add TII::isIgnorableUse() to allow VOP rematerialization Any def of EXEC prevents rematerialization of any VOP instruction because of the physreg use. Create a callback to check if the physreg use can be ingored to allow rematerialization. Differential Revision: https://reviews.llvm.org/D105836 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 6 ++++++ llvm/lib/CodeGen/LiveRangeEdit.cpp | 5 +++-- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++++ llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 ++ llvm/test/CodeGen/AMDGPU/remat-sop.mir | 29 +++++++++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/remat-vop.mir | 25 +++++++++++++++++++++++++ 6 files changed, 71 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 21758e0..5c45cea 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -129,6 +129,12 @@ public: isReallyTriviallyReMaterializableGeneric(MI, AA))); } + /// Given \p MO is a PhysReg use return if it can be ignored for the purpose + /// of instruction rematerialization. + virtual bool isIgnorableUse(const MachineOperand &MO) const { + return false; + } + protected: /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is /// set, this hook lets the target specify whether the instruction is actually diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index c5d1734..64a2dd2 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -113,9 +113,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; - // We can't remat physreg uses, unless it is a constant. + // We can't remat physreg uses, unless it is a constant or target wants + // to ignore this use. if (Register::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg())) + if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO)) continue; return false; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d0f5b2d..af276c6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -122,6 +122,12 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, return false; } +bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { + // Any implicit use of exec by VALU is not a real register read. + return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && + isVALU(*MO.getParent()); +} + bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 768cfd8..e55774b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -181,6 +181,8 @@ public: bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA) const override; + bool isIgnorableUse(const MachineOperand &MO) const override; + bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override; diff --git a/llvm/test/CodeGen/AMDGPU/remat-sop.mir b/llvm/test/CodeGen/AMDGPU/remat-sop.mir index 051f5ad..ed799bf 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-sop.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-sop.mir @@ -23,6 +23,35 @@ body: | S_ENDPGM 0 ... --- +name: test_no_remat_s_mov_b32_impuse_exec +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_no_remat_s_mov_b32_impuse_exec + ; GCN: $exec = IMPLICIT_DEF + ; GCN: renamable $sgpr0 = S_MOV_B32 1, implicit $exec + ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5) + ; GCN: renamable $sgpr1 = S_MOV_B32 2, implicit $exec + ; GCN: renamable $sgpr0 = S_MOV_B32 3, implicit $exec + ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + ; GCN: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $sgpr0 + ; GCN: S_NOP 0, implicit killed renamable $sgpr1 + ; GCN: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5) + ; GCN: S_NOP 0, implicit killed renamable $sgpr0 + ; GCN: S_ENDPGM 0 + $exec = IMPLICIT_DEF + %0:sreg_32 = S_MOV_B32 1, implicit $exec + %1:sreg_32 = S_MOV_B32 2, implicit $exec + %2:sreg_32 = S_MOV_B32 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_ENDPGM 0 +... +--- name: test_remat_s_mov_b64 tracksRegLiveness: true body: | diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir index 1de8efe..2be2705 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -52,6 +52,31 @@ body: | S_ENDPGM 0 ... --- +name: test_remat_v_mov_b32_e32_exec_def +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec + ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec + ; GCN: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc + ; GCN: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + $exec = S_ANDN2_B64_term $exec, undef %4:sreg_64, implicit-def $scc + S_ENDPGM 0 +... +--- name: test_remat_v_mov_b32_e64 tracksRegLiveness: true body: | -- 2.7.4