From: Carl Ritson Date: Thu, 14 Apr 2022 07:06:17 +0000 (+0900) Subject: [AMDGPU] Try to avoid inserting duplicate s_inst_prefetch X-Git-Tag: upstream/15.0.7~10464 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=35ea326047ef1220f26dc69593db9842a7dfeec1;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Try to avoid inserting duplicate s_inst_prefetch Check for existing s_inst_prefetch instructions when configuring prefetches during loop alignment. Reviewed By: rampitec, foad Differential Revision: https://reviews.llvm.org/D123569 --- diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bb65557..0c344df 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12312,13 +12312,17 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { MachineBasicBlock *Exit = ML->getExitBlock(); if (Pre && Exit) { - BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(1); // prefetch 2 lines behind PC - - BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(2); // prefetch 1 line behind PC + auto PreTerm = Pre->getFirstTerminator(); + if (PreTerm == Pre->begin() || + std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(1); // prefetch 2 lines behind PC + + auto ExitHead = Exit->getFirstNonDebugInstr(); + if (ExitHead == Exit->end() || + ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(2); // prefetch 1 line behind PC } return CacheLineAlign; diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll index cbdb00e..26d0c05 100644 --- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll @@ -11,8 +11,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) { ; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: ; implicit-def: $sgpr2 ; GFX10-NEXT: s_inst_prefetch 0x1 -; GFX10-NEXT: s_inst_prefetch 0x1 -; GFX10-NEXT: s_inst_prefetch 0x1 ; GFX10-NEXT: s_branch .LBB0_2 ; GFX10-NEXT: .p2align 6 ; GFX10-NEXT: .LBB0_1: ; %Flow @@ -50,8 +48,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) { ; GFX10-NEXT: s_branch .LBB0_1 ; GFX10-NEXT: .LBB0_4: ; %loop0_merge ; GFX10-NEXT: s_inst_prefetch 0x2 -; GFX10-NEXT: s_inst_prefetch 0x2 -; GFX10-NEXT: s_inst_prefetch 0x2 ; GFX10-NEXT: s_endpgm branch1_true: br label %2