[AMDGPU] Move insertion of function entry waitcnt later

author Austin Kerbow <Austin.Kerbow@amd.com>

Tue, 27 Apr 2021 16:29:27 +0000 (09:29 -0700)

committer Austin Kerbow <Austin.Kerbow@amd.com>

Thu, 6 May 2021 00:58:38 +0000 (17:58 -0700)
author Austin Kerbow <Austin.Kerbow@amd.com>
Tue, 27 Apr 2021 16:29:27 +0000 (09:29 -0700)
committer Austin Kerbow <Austin.Kerbow@amd.com>
Thu, 6 May 2021 00:58:38 +0000 (17:58 -0700)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

index 204e392..cead53a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1600,6 +1600,28 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
  
    TrackedWaitcntSet.clear();
    BlockInfos.clear();
+  bool Modified = false;
+
+  if (!MFI->isEntryFunction()) {
+    // Wait for any outstanding memory operations that the input registers may
+    // depend on. We can't track them and it's better to do the wait after the
+    // costly call sequence.
+
+    // TODO: Could insert earlier and schedule more liberally with operations
+    // that only use caller preserved registers.
+    MachineBasicBlock &EntryBB = MF.front();
+    MachineBasicBlock::iterator I = EntryBB.begin();
+    for (MachineBasicBlock::iterator E = EntryBB.end();
+         I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
+      ;
+    BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
+    if (ST->hasVscnt())
+      BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
+          .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+          .addImm(0);
+
+    Modified = true;
+  }
  
    // Keep iterating over the blocks in reverse post order, inserting and
    // updating s_waitcnt where needed, until a fix point is reached.
@@ -1607,7 +1629,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
      BlockInfos.insert({MBB, BlockInfo(MBB)});
  
    std::unique_ptr<WaitcntBrackets> Brackets;
-  bool Modified = false;
    bool Repeat;
    do {
      Repeat = false;
@@ -1707,26 +1728,5 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
      }
    }
  
-  if (!MFI->isEntryFunction()) {
-    // Wait for any outstanding memory operations that the input registers may
-    // depend on. We can't track them and it's better to the wait after the
-    // costly call sequence.
-
-    // TODO: Could insert earlier and schedule more liberally with operations
-    // that only use caller preserved registers.
-    MachineBasicBlock &EntryBB = MF.front();
-    MachineBasicBlock::iterator I = EntryBB.begin();
-    for (MachineBasicBlock::iterator E = EntryBB.end();
-         I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
-      ;
-    BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
-    if (ST->hasVscnt())
-      BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
-          .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
-          .addImm(0);
-
-    Modified = true;
-  }
-
    return Modified;
  }
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir

index a79187a..0665f96 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -192,4 +192,17 @@ body:             |
      $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
      S_WAITCNT 3952
      KILL $vgpr0
+
+# Combine preexisting waitcnt with wait added to the start of a non-entry function.
+
+---
+name:            test_waitcnt_preexisting_func_start
+body:             |
+  bb.0:
+    ; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
+    ; GFX9: S_WAITCNT 0
+    ; GFX9-NOT: S_WAITCNT 0
+    ; GFX9: S_ENDPGM 0
+    S_WAITCNT 0
+    S_ENDPGM 0
  ...
author	Austin Kerbow <Austin.Kerbow@amd.com>
	Tue, 27 Apr 2021 16:29:27 +0000 (09:29 -0700)
committer	Austin Kerbow <Austin.Kerbow@amd.com>
	Thu, 6 May 2021 00:58:38 +0000 (17:58 -0700)
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir		patch \| blob \| history