R600: Optimize and cleanup KILL on SI

author Tom Stellard <thomas.stellard@amd.com>

Fri, 18 Jan 2013 21:15:50 +0000 (21:15 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 18 Jan 2013 21:15:50 +0000 (21:15 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 18 Jan 2013 21:15:50 +0000 (21:15 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 18 Jan 2013 21:15:50 +0000 (21:15 +0000)
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp

index 4c672ca..18fa908 100644 (file)
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    case AMDGPU::SI_INTERP_CONST:
      LowerSI_INTERP_CONST(MI, *BB, I, MRI);
      break;
-  case AMDGPU::SI_KIL:
-    LowerSI_KIL(MI, *BB, I, MRI);
-    break;
    case AMDGPU::SI_WQM:
      LowerSI_WQM(MI, *BB, I, MRI);
      break;
@@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
    MI->eraseFromParent();
  }
  
-void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
-    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
-  // Clear this pixel from the exec mask if the operand is negative
-  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
-          AMDGPU::VCC)
-          .addReg(AMDGPU::SREG_LIT_0)
-          .addOperand(MI->getOperand(0));
-
-  MI->eraseFromParent();
-}
-
  void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
      MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
    unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h

index c088112..db36eef 100644 (file)
--- a/llvm/lib/Target/R600/SIISelLowering.h
+++ b/llvm/lib/Target/R600/SIISelLowering.h
@@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
    void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
                MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
-  void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
-              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
    void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
    void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td

index 005be96..cac42da 100644 (file)
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI <
                                                   imm:$attr, SReg_32:$params))]
  >;
  
-def SI_KIL : InstSI <
-  (outs),
-  (ins VReg_32:$src),
-  "SI_KIL $src",
-  [(int_AMDGPU_kill VReg_32:$src)]
->;
-
  def SI_WQM : InstSI <
    (outs),
    (ins),
@@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI <
    [(int_SI_end_cf SReg_64:$saved)]
  >;
  
+def SI_KILL : InstSI <
+  (outs),
+  (ins VReg_32:$src),
+  "SI_KIL $src",
+  [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
  } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
    // Uses = [EXEC], Defs = [EXEC]
  
  } // end IsCodeGenOnly, isPseudo
  
+def : Pat <
+  (int_AMDGPU_kilp),
+  (SI_KILL (V_MOV_IMM_I32 0xbf800000))
+>;
+
  /* int_SI_vs_load_input */
  def : Pat<
    (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
@@ -1315,11 +1320,6 @@ def : Pat<
  >;
  
  def : Pat <
-  (int_AMDGPU_kilp),
-  (SI_KIL (V_MOV_IMM_I32 0xbf800000))
->;
-
-def : Pat <
    (int_AMDGPU_cube VReg_128:$src),
    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
      (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
diff --git a/llvm/lib/Target/R600/SILowerControlFlow.cpp b/llvm/lib/Target/R600/SILowerControlFlow.cpp

index 3fbe653..3780e40 100644 (file)
--- a/llvm/lib/Target/R600/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/R600/SILowerControlFlow.cpp
@@ -68,7 +68,10 @@ private:
    static char ID;
    const TargetInstrInfo *TII;
  
-  void Skip(MachineInstr &MI, MachineOperand &To);
+  bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+  void Skip(MachineInstr &From, MachineOperand &To);
+  void SkipIfDead(MachineInstr &MI);
  
    void If(MachineInstr &MI);
    void Else(MachineInstr &MI);
@@ -78,6 +81,7 @@ private:
    void Loop(MachineInstr &MI);
    void EndCf(MachineInstr &MI);
  
+  void Kill(MachineInstr &MI);
    void Branch(MachineInstr &MI);
  
  public:
@@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
    return new SILowerControlFlowPass(tm);
  }
  
-void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+                                        MachineBasicBlock *To) {
+
    unsigned NumInstr = 0;
  
-  for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
-       NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
+  for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
         MBB = *MBB->succ_begin()) {
  
      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
           NumInstr < SkipThreshold && I != E; ++I) {
  
        if (I->isBundle() || !I->isBundled())
-        ++NumInstr;
+        if (++NumInstr >= SkipThreshold)
+          return true;
      }
    }
  
-  if (NumInstr < SkipThreshold)
+  return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+  if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
      return;
  
    DebugLoc DL = From.getDebugLoc();
@@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
            .addReg(AMDGPU::EXEC);
  }
  
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+    return;
+
+  MachineBasicBlock::iterator Insert = &MI;
+  ++Insert;
+
+  // If the exec mask is non-zero, skip the next two instructions
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+          .addImm(3)
+          .addReg(AMDGPU::EXEC);
+
+  // Exec mask is zero: Export to NULL target...
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+          .addImm(0)
+          .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+          .addImm(0)
+          .addImm(1)
+          .addImm(1)
+          .addReg(AMDGPU::SREG_LIT_0)
+          .addReg(AMDGPU::SREG_LIT_0)
+          .addReg(AMDGPU::SREG_LIT_0)
+          .addReg(AMDGPU::SREG_LIT_0);
+
+  // ... and terminate wavefront
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
  void SILowerControlFlowPass::If(MachineInstr &MI) {
    MachineBasicBlock &MBB = *MI.getParent();
    DebugLoc DL = MI.getDebugLoc();
@@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
      assert(0);
  }
  
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Kill is only allowed in pixel shaders
+  MachineFunction &MF = *MBB.getParent();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  assert(Info->ShaderType == ShaderType::PIXEL);
+
+  // Clear this pixel from the exec mask if the operand is negative
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+          .addReg(AMDGPU::SREG_LIT_0)
+          .addOperand(MI.getOperand(0));
+
+  MI.eraseFromParent();
+}
+
  bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
-  bool HaveCf = false;
+
+  bool HaveKill = false;
+  unsigned Depth = 0;
  
    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
         BI != BE; ++BI) {
@@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
        switch (MI.getOpcode()) {
          default: break;
          case AMDGPU::SI_IF:
+          ++Depth;
            If(MI);
            break;
  
@@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
            break;
  
          case AMDGPU::SI_LOOP:
+          ++Depth;
            Loop(MI);
            break;
  
          case AMDGPU::SI_END_CF:
-          HaveCf = true;
+          if (--Depth == 0 && HaveKill) {
+            SkipIfDead(MI);
+            HaveKill = false;
+          }
            EndCf(MI);
            break;
  
+        case AMDGPU::SI_KILL:
+          if (Depth == 0)
+            SkipIfDead(MI);
+          else
+            HaveKill = true;
+          Kill(MI);
+          break;
+
          case AMDGPU::S_BRANCH:
            Branch(MI);
            break;
@@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
      }
    }
  
-  // TODO: What is this good for?
-  unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
-  if (HaveCf && ShaderType == ShaderType::PIXEL) {
-    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-         BI != BE; ++BI) {
-
-      MachineBasicBlock &MBB = *BI;
-      if (MBB.succ_empty()) {
-
-        MachineInstr &MI = *MBB.getFirstNonPHI();
-        DebugLoc DL = MI.getDebugLoc();
-
-        // If the exec mask is non-zero, skip the next two instructions
-        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-               .addImm(3)
-               .addReg(AMDGPU::EXEC);
-
-        // Exec mask is zero: Export to NULL target...
-        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
-                .addImm(0)
-                .addImm(0x09) // V_008DFC_SQ_EXP_NULL
-                .addImm(0)
-                .addImm(1)
-                .addImm(1)
-                .addReg(AMDGPU::SREG_LIT_0)
-                .addReg(AMDGPU::SREG_LIT_0)
-                .addReg(AMDGPU::SREG_LIT_0)
-                .addReg(AMDGPU::SREG_LIT_0);
-
-        // ... and terminate wavefront
-        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
-      }
-    }
-  }
-
    return true;
  }
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 18 Jan 2013 21:15:50 +0000 (21:15 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 18 Jan 2013 21:15:50 +0000 (21:15 +0000)
llvm/lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/R600/SIISelLowering.h		patch \| blob \| history
llvm/lib/Target/R600/SIInstructions.td		patch \| blob \| history
llvm/lib/Target/R600/SILowerControlFlow.cpp		patch \| blob \| history