Differential Revision: https://reviews.llvm.org/D24985
llvm-svn: 282875
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
+
+ /// \returns True if waitcnt instruction is needed before barrier instruction,
+ /// false otherwise.
+ bool needWaitcntBeforeBarrier() const {
+ return true;
+ }
};
} // End namespace llvm
// S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
- if (I->getOpcode() == AMDGPU::S_BARRIER ||
- I->getOpcode() == AMDGPU::S_SENDMSG)
+ if ((I->getOpcode() == AMDGPU::S_BARRIER &&
+ ST->needWaitcntBeforeBarrier()) ||
+ I->getOpcode() == AMDGPU::S_SENDMSG)
Required = LastIssued;
else
Required = handleOperands(*I);