From 35ea326047ef1220f26dc69593db9842a7dfeec1 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Thu, 14 Apr 2022 16:06:17 +0900 Subject: [PATCH] [AMDGPU] Try to avoid inserting duplicate s_inst_prefetch Check for existing s_inst_prefetch instructions when configuring prefetches during loop alignment. Reviewed By: rampitec, foad Differential Revision: https://reviews.llvm.org/D123569 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 18 +++++++++++------- llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll | 4 ---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bb65557..0c344df 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12312,13 +12312,17 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { MachineBasicBlock *Exit = ML->getExitBlock(); if (Pre && Exit) { - BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(1); // prefetch 2 lines behind PC - - BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(2); // prefetch 1 line behind PC + auto PreTerm = Pre->getFirstTerminator(); + if (PreTerm == Pre->begin() || + std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(1); // prefetch 2 lines behind PC + + auto ExitHead = Exit->getFirstNonDebugInstr(); + if (ExitHead == Exit->end() || + ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(2); // prefetch 1 line behind PC } return CacheLineAlign; diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll index cbdb00e..26d0c05 100644 --- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll @@ -11,8 +11,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) { ; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: ; implicit-def: $sgpr2 ; GFX10-NEXT: s_inst_prefetch 0x1 -; GFX10-NEXT: s_inst_prefetch 0x1 -; GFX10-NEXT: s_inst_prefetch 0x1 ; GFX10-NEXT: s_branch .LBB0_2 ; GFX10-NEXT: .p2align 6 ; GFX10-NEXT: .LBB0_1: ; %Flow @@ -50,8 +48,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) { ; GFX10-NEXT: s_branch .LBB0_1 ; GFX10-NEXT: .LBB0_4: ; %loop0_merge ; GFX10-NEXT: s_inst_prefetch 0x2 -; GFX10-NEXT: s_inst_prefetch 0x2 -; GFX10-NEXT: s_inst_prefetch 0x2 ; GFX10-NEXT: s_endpgm branch1_true: br label %2 -- 2.7.4