MachineBasicBlock *Exit = ML->getExitBlock();
if (Pre && Exit) {
- BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(),
- TII->get(AMDGPU::S_INST_PREFETCH))
- .addImm(1); // prefetch 2 lines behind PC
-
- BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(),
- TII->get(AMDGPU::S_INST_PREFETCH))
- .addImm(2); // prefetch 1 line behind PC
+ auto PreTerm = Pre->getFirstTerminator();
+ if (PreTerm == Pre->begin() ||
+ std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
+ BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(1); // prefetch 2 lines behind PC
+
+ auto ExitHead = Exit->getFirstNonDebugInstr();
+ if (ExitHead == Exit->end() ||
+ ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
+ BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(2); // prefetch 1 line behind PC
}
return CacheLineAlign;
; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: ; implicit-def: $sgpr2
; GFX10-NEXT: s_inst_prefetch 0x1
-; GFX10-NEXT: s_inst_prefetch 0x1
-; GFX10-NEXT: s_inst_prefetch 0x1
; GFX10-NEXT: s_branch .LBB0_2
; GFX10-NEXT: .p2align 6
; GFX10-NEXT: .LBB0_1: ; %Flow
; GFX10-NEXT: s_branch .LBB0_1
; GFX10-NEXT: .LBB0_4: ; %loop0_merge
; GFX10-NEXT: s_inst_prefetch 0x2
-; GFX10-NEXT: s_inst_prefetch 0x2
-; GFX10-NEXT: s_inst_prefetch 0x2
; GFX10-NEXT: s_endpgm
branch1_true:
br label %2