AMDGPU: Directly emit m0 initialization with s_mov_b32

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 14 Apr 2016 21:58:15 +0000 (21:58 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 14 Apr 2016 21:58:15 +0000 (21:58 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 14 Apr 2016 21:58:15 +0000 (21:58 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 14 Apr 2016 21:58:15 +0000 (21:58 +0000)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index ee7ad3293d9cabc13949d248dc38b667835e10bd..52880a282d91819378ffaebbde1f7a04fe58f5ec 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1103,10 +1103,18 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
                             + StringRef(RegName) + "\"."));
  }
  
-MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
-    MachineInstr * MI, MachineBasicBlock * BB) const {
-
+MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
+  MachineInstr *MI, MachineBasicBlock *BB) const {
    switch (MI->getOpcode()) {
+  case AMDGPU::SI_INIT_M0: {
+    const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+    BuildMI(*BB, MI->getIterator(), MI->getDebugLoc(),
+            TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+      .addOperand(MI->getOperand(0));
+    MI->eraseFromParent();
+    break;
+  }
    case AMDGPU::BRANCH:
      return BB;
    case AMDGPU::GET_GROUPSTATICSIZE: {
@@ -1395,19 +1403,18 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
  
  SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
                                     SDValue V) const {
+  // We can't use S_MOV_B32 directly, because there is no way to specify m0 as
+  // the destination register.
+  //
    // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
    // so we will end up with redundant moves to m0.
    //
-  // We can't use S_MOV_B32, because there is no way to specify m0 as the
-  // destination register.
-  //
-  // We have to use them both.  Machine cse will combine all the S_MOV_B32
-  // instructions and the register coalescer eliminate the extra copies.
-  SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
-  return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
-                          SDValue(M0, 0), SDValue()); // Glue
-                                                      // A Null SDValue creates
-                                                      // a glue result.
+  // We use a pseudo to ensure we emit s_mov_b32 with m0 as the direct result.
+
+  // A Null SDValue creates a glue result.
+  SDNode *M0 = DAG.getMachineNode(AMDGPU::SI_INIT_M0, DL, MVT::Other, MVT::Glue,
+                                  V, Chain);
+  return SDValue(M0, 0);
  }
  
  SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td

index 4620ec05752767fc03a77c5a506e263f15b53c31..7b8a62bc8fbba5b445dc178669a4e317bcb397c1 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2014,7 +2014,23 @@ def SI_KILL : InstSI <
  
  } // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
  
-let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+// Used as an isel pseudo to directly emit initialization with an
+// s_mov_b32 rather than a copy of another initialized
+// register. MachineCSE skips copies, and we don't want to have to
+// fold operands before it runs.
+def SI_INIT_M0 : InstSI <
+  (outs),
+  (ins SSrc_32:$src), "", []> {
+  let Defs = [M0];
+  let usesCustomInserter = 1;
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let isAsCheapAsAMove = 1;
+  let SALU = 1;
+  let isReMaterializable = 1;
+}
+
+let Uses = [EXEC], Defs = [EXEC, VCC, M0] in {
  
  class SI_INDIRECT_SRC<RegisterClass rc> : InstSI <
    (outs VGPR_32:$dst, SReg_64:$temp),
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 14 Apr 2016 21:58:15 +0000 (21:58 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 14 Apr 2016 21:58:15 +0000 (21:58 +0000)
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history