AMDGPU: Fix Two Address problems with v_movreld

author Nicolai Haehnle <nhaehnle@gmail.com>

Mon, 24 Oct 2016 14:56:02 +0000 (14:56 +0000)

committer Nicolai Haehnle <nhaehnle@gmail.com>

Mon, 24 Oct 2016 14:56:02 +0000 (14:56 +0000)
author Nicolai Haehnle <nhaehnle@gmail.com>
Mon, 24 Oct 2016 14:56:02 +0000 (14:56 +0000)
committer Nicolai Haehnle <nhaehnle@gmail.com>
Mon, 24 Oct 2016 14:56:02 +0000 (14:56 +0000)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index fe7f292..39486ab 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1451,6 +1451,23 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
    return LoopBB;
  }
  
+static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) {
+  switch (VecRC->getSize()) {
+  case 4:
+    return AMDGPU::V_MOVRELD_B32_V1;
+  case 8:
+    return AMDGPU::V_MOVRELD_B32_V2;
+  case 16:
+    return AMDGPU::V_MOVRELD_B32_V4;
+  case 32:
+    return AMDGPU::V_MOVRELD_B32_V8;
+  case 64:
+    return AMDGPU::V_MOVRELD_B32_V16;
+  default:
+    llvm_unreachable("unsupported size for MOVRELD pseudos");
+  }
+}
+
  static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
                                            MachineBasicBlock &MBB,
                                            const SISubtarget &ST) {
@@ -1504,20 +1521,13 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
  
        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
      } else {
-      const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32);
+      const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
  
-      MachineInstr *MovRel =
-        BuildMI(MBB, I, DL, MovRelDesc)
-        .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
-        .addOperand(*Val)
-        .addReg(Dst, RegState::ImplicitDefine)
-        .addReg(SrcVec->getReg(), RegState::Implicit);
-
-      const int ImpDefIdx = MovRelDesc.getNumOperands() +
-        MovRelDesc.getNumImplicitUses();
-      const int ImpUseIdx = ImpDefIdx + 1;
-
-      MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+      BuildMI(MBB, I, DL, MovRelDesc)
+          .addReg(Dst, RegState::Define)
+          .addReg(SrcVec->getReg())
+          .addOperand(*Val)
+          .addImm(SubReg - AMDGPU::sub0);
      }
  
      MI.eraseFromParent();
@@ -1555,20 +1565,13 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
        .addReg(PhiReg, RegState::Implicit)
        .addReg(AMDGPU::M0, RegState::Implicit);
    } else {
-    const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32);
-    // vdst is not actually read and just provides the base register index.
-    MachineInstr *MovRel =
-      BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
-    .addReg(PhiReg, RegState::Undef, SubReg) // vdst
-    .addOperand(*Val)
-    .addReg(Dst, RegState::ImplicitDefine)
-    .addReg(PhiReg, RegState::Implicit);
-
-    const int ImpDefIdx = MovRelDesc.getNumOperands() +
-      MovRelDesc.getNumImplicitUses();
-    const int ImpUseIdx = ImpDefIdx + 1;
-
-    MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+    const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
+
+    BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
+        .addReg(Dst, RegState::Define)
+        .addReg(PhiReg)
+        .addOperand(*Val)
+        .addImm(SubReg - AMDGPU::sub0);
    }
  
    MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index dd9742b..fffdeb4 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -909,6 +909,32 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
      MI.eraseFromParent();
      break;
    }
+  case AMDGPU::V_MOVRELD_B32_V1:
+  case AMDGPU::V_MOVRELD_B32_V2:
+  case AMDGPU::V_MOVRELD_B32_V4:
+  case AMDGPU::V_MOVRELD_B32_V8:
+  case AMDGPU::V_MOVRELD_B32_V16: {
+    const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
+    unsigned VecReg = MI.getOperand(0).getReg();
+    bool IsUndef = MI.getOperand(1).isUndef();
+    unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
+    assert(VecReg == MI.getOperand(1).getReg());
+
+    MachineInstr *MovRel =
+        BuildMI(MBB, MI, DL, MovRelDesc)
+            .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+            .addOperand(MI.getOperand(2))
+            .addReg(VecReg, RegState::ImplicitDefine)
+            .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+
+    const int ImpDefIdx =
+        MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
+    const int ImpUseIdx = ImpDefIdx + 1;
+    MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+
+    MI.eraseFromParent();
+    break;
+  }
    case AMDGPU::SI_PC_ADD_REL_OFFSET: {
      MachineFunction &MF = *MBB.getParent();
      unsigned Reg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td

index 912f5ef..6124d4e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -538,6 +538,26 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs),
    let VOP1 = 1;
  }
  
+// This is a pseudo variant of the v_movreld_b32 instruction in which the
+// vector operand appears only twice, once as def and once as use. Using this
+// pseudo avoids problems with the Two Address instructions pass.
+class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
+  (outs rc:$vdst),
+  (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
+  let VOP1 = 1;
+
+  let Constraints = "$vsrc = $vdst";
+  let Uses = [M0, EXEC];
+
+  let SubtargetPredicate = HasMovrel;
+}
+
+def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
+def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
+def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
+def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
+def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
+
  let Predicates = [isVI] in {
  
  def : Pat <
diff --git a/llvm/test/CodeGen/AMDGPU/movreld-bug.ll b/llvm/test/CodeGen/AMDGPU/movreld-bug.ll

new file mode 100644 (file)

index 0000000..0cc8a52
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/movreld-bug.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: v_movreld_b32_e32 v0,
+; GCN: v_mov_b32_e32 v0, v1
+; GCN: ; return
+define amdgpu_ps float @main(i32 inreg %arg) #0 {
+main_body:
+  %tmp24 = insertelement <2 x float> undef, float 0.000000e+00, i32 %arg
+  %tmp25 = extractelement <2 x float> %tmp24, i32 1
+  ret float %tmp25
+}
+
+attributes #0 = { "InitialPSInputAddr"="36983" }
author	Nicolai Haehnle <nhaehnle@gmail.com>
	Mon, 24 Oct 2016 14:56:02 +0000 (14:56 +0000)
committer	Nicolai Haehnle <nhaehnle@gmail.com>
	Mon, 24 Oct 2016 14:56:02 +0000 (14:56 +0000)
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/VOP1Instructions.td		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/movreld-bug.ll	[new file with mode: 0644]	patch \| blob