#include "target_impl.h"
-// inc requires an amdgcn specific intrinsic which is not yet available
-DEVICE unsigned atomicInc(unsigned *address);
-DEVICE unsigned atomicInc(unsigned *address, unsigned max);
-DEVICE int atomicInc(int *address);
-
namespace {
template <typename T> DEVICE T atomicAdd(T *address, T val) {
return compare;
}
+INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
+ return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
+}
+
} // namespace
#endif
__builtin_amdgcn_s_barrier();
}
-DEVICE void __kmpc_impl_threadfence(void);
-DEVICE void __kmpc_impl_threadfence_block(void);
-DEVICE void __kmpc_impl_threadfence_system(void);
+INLINE void __kmpc_impl_threadfence() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+}
+
+INLINE void __kmpc_impl_threadfence_block() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+}
+
+INLINE void __kmpc_impl_threadfence_system() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+}
// Calls to the AMDGCN layer (assuming 1D layout)
INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }