R600/SI: Disable VMEM and SMEM clauses by breaking them with S_NOP

author Marek Olsak <marek.olsak@amd.com>

Sun, 7 Dec 2014 17:17:43 +0000 (17:17 +0000)

committer Marek Olsak <marek.olsak@amd.com>

Sun, 7 Dec 2014 17:17:43 +0000 (17:17 +0000)
author Marek Olsak <marek.olsak@amd.com>
Sun, 7 Dec 2014 17:17:43 +0000 (17:17 +0000)
committer Marek Olsak <marek.olsak@amd.com>
Sun, 7 Dec 2014 17:17:43 +0000 (17:17 +0000)
diff --git a/llvm/lib/Target/R600/SIInsertWaits.cpp b/llvm/lib/Target/R600/SIInsertWaits.cpp

index 712d97d..f279e5e 100644 (file)
--- a/llvm/lib/Target/R600/SIInsertWaits.cpp
+++ b/llvm/lib/Target/R600/SIInsertWaits.cpp
@@ -41,6 +41,12 @@ typedef union {
  
  } Counters;
  
+typedef enum {
+  OTHER,
+  SMEM,
+  VMEM
+} InstType;
+
  typedef Counters RegCounters[512];
  typedef std::pair<unsigned, unsigned> RegInterval;
  
@@ -73,6 +79,9 @@ private:
    /// \brief Different export instruction types seen since last wait.
    unsigned ExpInstrTypesSeen;
  
+  /// \brief Type of the last opcode.
+  InstType LastOpcodeType;
+
    /// \brief Get increment/decrement amount for this instruction.
    Counters getHwCounts(MachineInstr &MI);
  
@@ -83,7 +92,8 @@ private:
    RegInterval getRegInterval(MachineOperand &Op);
  
    /// \brief Handle instructions async components
-  void pushInstruction(MachineInstr &MI);
+  void pushInstruction(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator I);
  
    /// \brief Insert the actual wait instruction
    bool insertWait(MachineBasicBlock &MBB,
@@ -203,10 +213,11 @@ RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
    return Result;
  }
  
-void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I) {
  
    // Get the hardware counter increments and sum them up
-  Counters Increment = getHwCounts(MI);
+  Counters Increment = getHwCounts(*I);
    unsigned Sum = 0;
  
    for (unsigned i = 0; i < 3; ++i) {
@@ -215,17 +226,42 @@ void SIInsertWaits::pushInstruction(MachineInstr &MI) {
    }
  
    // If we don't increase anything then that's it
-  if (Sum == 0)
+  if (Sum == 0) {
+    LastOpcodeType = OTHER;
      return;
+  }
+
+  if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
+    // or SMEM clause, respectively.
+    //
+    // The temporary workaround is to break the clauses with S_NOP.
+    //
+    // The proper solution would be to allocate registers such that all source
+    // and destination registers don't overlap, e.g. this is illegal:
+    //   r0 = load r2
+    //   r2 = load r0
+    if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
+        (LastOpcodeType == VMEM && Increment.Named.VM)) {
+      // Insert a NOP to break the clause.
+      BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
+          .addImm(0);
+    }
+
+    if (TII->isSMRD(I->getOpcode()))
+      LastOpcodeType = SMEM;
+    else if (Increment.Named.VM)
+      LastOpcodeType = VMEM;
+  }
  
    // Remember which export instructions we have seen
    if (Increment.Named.EXP) {
-    ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+    ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
    }
  
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  
-    MachineOperand &Op = MI.getOperand(i);
+    MachineOperand &Op = I->getOperand(i);
      if (!isOpRelevant(Op))
        continue;
  
@@ -302,6 +338,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
                    ((Counts.Named.EXP & 0x7) << 4) |
                    ((Counts.Named.LGKM & 0x7) << 8));
  
+  LastOpcodeType = OTHER;
    return true;
  }
  
@@ -356,6 +393,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
  
    WaitedOn = ZeroCounts;
    LastIssued = ZeroCounts;
+  LastOpcodeType = OTHER;
  
    memset(&UsedRegs, 0, sizeof(UsedRegs));
    memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -368,7 +406,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
           I != E; ++I) {
  
        Changes |= insertWait(MBB, I, handleOperands(*I));
-      pushInstruction(*I);
+      pushInstruction(MBB, I);
      }
  
      // Wait for everything at the end of the MBB
author	Marek Olsak <marek.olsak@amd.com>
	Sun, 7 Dec 2014 17:17:43 +0000 (17:17 +0000)
committer	Marek Olsak <marek.olsak@amd.com>
	Sun, 7 Dec 2014 17:17:43 +0000 (17:17 +0000)