From 3d07a6d891f58c5ebb64e0ff63f27ca97493e6f4 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 31 Mar 2021 16:45:21 +0100 Subject: [PATCH] [AMDGPU][GlobalISel] Add IMG init in selectImageIntrinsic Doing this during instruction selection avoids the cost of running SIAddIMGInit which is yet another pass over the MIR. Differential Revision: https://reviews.llvm.org/D99670 --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 32 ++++++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 --- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index d80e6c5..858a643 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1689,6 +1689,38 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( if (BaseOpcode->HasD16) MIB.addImm(IsD16 ? -1 : 0); + if (IsTexFail) { + // An image load instruction with TFE/LWE only conditionally writes to its + // result registers. Initialize them to zero so that we always get well + // defined result values. + assert(VDataOut && !VDataIn); + Register Tied = MRI->cloneVirtualRegister(VDataOut); + Register Zero = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero) + .addImm(0); + auto Parts = TRI.getRegSplitParts(MRI->getRegClass(Tied), 4); + if (STI.usePRTStrictNull()) { + // With enable-prt-strict-null enabled, initialize all result registers to + // zero. + auto RegSeq = + BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied); + for (auto Sub : Parts) + RegSeq.addReg(Zero).addImm(Sub); + } else { + // With enable-prt-strict-null disabled, only initialize the extra TFE/LWE + // result register. + Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); + auto RegSeq = + BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied); + for (auto Sub : Parts.drop_back(1)) + RegSeq.addReg(Undef).addImm(Sub); + RegSeq.addReg(Zero).addImm(Parts.back()); + } + MIB.addReg(Tied, RegState::Implicit); + MIB->tieOperands(0, MIB->getNumOperands() - 1); + } + MI.eraseFromParent(); return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index afa1e9d..6202eab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1133,10 +1133,6 @@ void GCNPassConfig::addPreGlobalInstructionSelect() { bool GCNPassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect(getOptLevel())); - // TODO: Fix instruction selection to do the right thing for image - // instructions with tfe or lwe in the first place, instead of running a - // separate pass to fix them up? - addPass(createSIAddIMGInitPass()); return false; } -- 2.7.4