[AMDGPU] Try to avoid inserting duplicate s_inst_prefetch
authorCarl Ritson <carl.ritson@amd.com>
Thu, 14 Apr 2022 07:06:17 +0000 (16:06 +0900)
committerCarl Ritson <carl.ritson@amd.com>
Thu, 14 Apr 2022 07:06:24 +0000 (16:06 +0900)
Check for existing s_inst_prefetch instructions when
configuring prefetches during loop alignment.

Reviewed By: rampitec, foad

Differential Revision: https://reviews.llvm.org/D123569

llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll

index bb65557..0c344df 100644 (file)
@@ -12312,13 +12312,17 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   MachineBasicBlock *Exit = ML->getExitBlock();
 
   if (Pre && Exit) {
-    BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(),
-            TII->get(AMDGPU::S_INST_PREFETCH))
-      .addImm(1); // prefetch 2 lines behind PC
-
-    BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(),
-            TII->get(AMDGPU::S_INST_PREFETCH))
-      .addImm(2); // prefetch 1 line behind PC
+    auto PreTerm = Pre->getFirstTerminator();
+    if (PreTerm == Pre->begin() ||
+        std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
+      BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+          .addImm(1); // prefetch 2 lines behind PC
+
+    auto ExitHead = Exit->getFirstNonDebugInstr();
+    if (ExitHead == Exit->end() ||
+        ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
+      BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+          .addImm(2); // prefetch 1 line behind PC
   }
 
   return CacheLineAlign;
index cbdb00e..26d0c05 100644 (file)
@@ -11,8 +11,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
 ; GFX10-NEXT:    s_mov_b32 s1, 0
 ; GFX10-NEXT:    ; implicit-def: $sgpr2
 ; GFX10-NEXT:    s_inst_prefetch 0x1
-; GFX10-NEXT:    s_inst_prefetch 0x1
-; GFX10-NEXT:    s_inst_prefetch 0x1
 ; GFX10-NEXT:    s_branch .LBB0_2
 ; GFX10-NEXT:    .p2align 6
 ; GFX10-NEXT:  .LBB0_1: ; %Flow
@@ -50,8 +48,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
 ; GFX10-NEXT:    s_branch .LBB0_1
 ; GFX10-NEXT:  .LBB0_4: ; %loop0_merge
 ; GFX10-NEXT:    s_inst_prefetch 0x2
-; GFX10-NEXT:    s_inst_prefetch 0x2
-; GFX10-NEXT:    s_inst_prefetch 0x2
 ; GFX10-NEXT:    s_endpgm
 branch1_true:
   br label %2