AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies

author Tom Stellard <thomas.stellard@amd.com>

Tue, 6 Sep 2016 20:00:26 +0000 (20:00 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 6 Sep 2016 20:00:26 +0000 (20:00 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Tue, 6 Sep 2016 20:00:26 +0000 (20:00 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 6 Sep 2016 20:00:26 +0000 (20:00 +0000)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index 56daea6..58747af 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1184,14 +1184,39 @@ static void removeModOperands(MachineInstr &MI) {
    MI.RemoveOperand(Src0ModIdx);
  }
  
-// TODO: Maybe this should be removed this and custom fold everything in
-// SIFoldOperands?
  bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
                                  unsigned Reg, MachineRegisterInfo *MRI) const {
    if (!MRI->hasOneNonDBGUse(Reg))
      return false;
  
    unsigned Opc = UseMI.getOpcode();
+  if (Opc == AMDGPU::COPY) {
+    bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
+    switch (DefMI.getOpcode()) {
+    default:
+      return false;
+    case AMDGPU::S_MOV_B64:
+      // TODO: We could fold 64-bit immediates, but this get compilicated
+      // when there are sub-registers.
+      return false;
+
+    case AMDGPU::V_MOV_B32_e32:
+    case AMDGPU::S_MOV_B32:
+      break;
+    }
+    unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+    const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
+    assert(ImmOp);
+    // FIXME: We could handle FrameIndex values here.
+    if (!ImmOp->isImm()) {
+      return false;
+    }
+    UseMI.setDesc(get(NewOpc));
+    UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
+    UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
+    return true;
+  }
+
    if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
      // Don't fold if we are using source modifiers. The new VOP2 instructions
      // don't have them.
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td

index 14d6daa..460bb4d 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -25,6 +25,7 @@ class SOP1_Pseudo <string opName, dag outs, dag ins,
    let SALU = 1;
    let SOP1 = 1;
    let SchedRW = [WriteSALU];
+  let UseNamedOperandTable = 1;
  
    string Mnemonic = opName;
    string AsmOperands = asmOps;
@@ -1100,4 +1101,4 @@ def S_GETREG_B32_vi        : SOPK_Real_vi <0x11, S_GETREG_B32>;
  def S_SETREG_B32_vi        : SOPK_Real_vi <0x12, S_SETREG_B32>;
  //def S_GETREG_REGRD_B32_vi  : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments
  def S_SETREG_IMM32_B32_vi  : SOPK_Real64<0x14, S_SETREG_IMM32_B32>,
-                             Select_vi<S_SETREG_IMM32_B32.Mnemonic>;
-\ No newline at end of file
+                             Select_vi<S_SETREG_IMM32_B32.Mnemonic>;
author	Tom Stellard <thomas.stellard@amd.com>
	Tue, 6 Sep 2016 20:00:26 +0000 (20:00 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 6 Sep 2016 20:00:26 +0000 (20:00 +0000)
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SOPInstructions.td		patch \| blob \| history