[AMDGPU][GlobalISel] Add IMG init in selectImageIntrinsic

author Jay Foad <jay.foad@amd.com>

Wed, 31 Mar 2021 15:45:21 +0000 (16:45 +0100)

committer Jay Foad <jay.foad@amd.com>

Thu, 1 Apr 2021 17:13:17 +0000 (18:13 +0100)
author Jay Foad <jay.foad@amd.com>
Wed, 31 Mar 2021 15:45:21 +0000 (16:45 +0100)
committer Jay Foad <jay.foad@amd.com>
Thu, 1 Apr 2021 17:13:17 +0000 (18:13 +0100)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

index d80e6c5..858a643 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1689,6 +1689,38 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
    if (BaseOpcode->HasD16)
      MIB.addImm(IsD16 ? -1 : 0);
  
+  if (IsTexFail) {
+    // An image load instruction with TFE/LWE only conditionally writes to its
+    // result registers. Initialize them to zero so that we always get well
+    // defined result values.
+    assert(VDataOut && !VDataIn);
+    Register Tied = MRI->cloneVirtualRegister(VDataOut);
+    Register Zero = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero)
+      .addImm(0);
+    auto Parts = TRI.getRegSplitParts(MRI->getRegClass(Tied), 4);
+    if (STI.usePRTStrictNull()) {
+      // With enable-prt-strict-null enabled, initialize all result registers to
+      // zero.
+      auto RegSeq =
+          BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
+      for (auto Sub : Parts)
+        RegSeq.addReg(Zero).addImm(Sub);
+    } else {
+      // With enable-prt-strict-null disabled, only initialize the extra TFE/LWE
+      // result register.
+      Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
+      auto RegSeq =
+          BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
+      for (auto Sub : Parts.drop_back(1))
+        RegSeq.addReg(Undef).addImm(Sub);
+      RegSeq.addReg(Zero).addImm(Parts.back());
+    }
+    MIB.addReg(Tied, RegState::Implicit);
+    MIB->tieOperands(0, MIB->getNumOperands() - 1);
+  }
+
    MI.eraseFromParent();
    return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
  }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index afa1e9d..6202eab 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1133,10 +1133,6 @@ void GCNPassConfig::addPreGlobalInstructionSelect() {
  
  bool GCNPassConfig::addGlobalInstructionSelect() {
    addPass(new InstructionSelect(getOptLevel()));
-  // TODO: Fix instruction selection to do the right thing for image
-  // instructions with tfe or lwe in the first place, instead of running a
-  // separate pass to fix them up?
-  addPass(createSIAddIMGInitPass());
    return false;
  }
author	Jay Foad <jay.foad@amd.com>
	Wed, 31 Mar 2021 15:45:21 +0000 (16:45 +0100)
committer	Jay Foad <jay.foad@amd.com>
	Thu, 1 Apr 2021 17:13:17 +0000 (18:13 +0100)
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history