From df0560ca00182364e0a786d35adb294c3c98dbd0 Mon Sep 17 00:00:00 2001 From: Anshil Gandhi Date: Mon, 29 Nov 2021 12:06:52 -0700 Subject: [PATCH] [HIP] Add atomic load, atomic store and atomic cmpxchng_weak builtin support in HIP-clang Introduce `__hip_atomic_load`, `__hip_atomic_store` and `__hip_atomic_compare_exchange_weak` builtins in HIP. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D114553 --- clang/include/clang/AST/Expr.h | 8 +-- clang/include/clang/Basic/Builtins.def | 5 +- clang/lib/AST/Expr.cpp | 3 + clang/lib/AST/StmtPrinter.cpp | 3 +- clang/lib/CodeGen/CGAtomic.cpp | 14 +++++ clang/lib/Sema/SemaChecking.cpp | 14 ++++- clang/test/CodeGenCUDA/atomic-ops.cu | 106 ++++++++++++++++++++++++++------- clang/test/SemaCUDA/atomic-ops.cu | 86 ++++++++++++++++++++++++++ 8 files changed, 209 insertions(+), 30 deletions(-) create mode 100644 clang/test/SemaCUDA/atomic-ops.cu diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 246585e..2c63406 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -6308,6 +6308,7 @@ public: getOp() == AO__hip_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_weak || + getOp() == AO__hip_atomic_compare_exchange_weak || getOp() == AO__atomic_compare_exchange || getOp() == AO__atomic_compare_exchange_n; } @@ -6342,10 +6343,9 @@ public: auto Kind = (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max) ? AtomicScopeModelKind::OpenCL - : (Op >= AO__hip_atomic_compare_exchange_strong && - Op <= AO__hip_atomic_fetch_max) - ? AtomicScopeModelKind::HIP - : AtomicScopeModelKind::None; + : (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max) + ? AtomicScopeModelKind::HIP + : AtomicScopeModelKind::None; return AtomicScopeModel::create(Kind); } diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 1f7680e..ad8b66a 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -855,8 +855,9 @@ ATOMIC_BUILTIN(__atomic_fetch_min, "v.", "t") ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t") // HIP atomic builtins. -// FIXME: Is `__hip_atomic_compare_exchange_n` or -// `__hip_atomic_compare_exchange_weak` needed? +ATOMIC_BUILTIN(__hip_atomic_load, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_store, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_compare_exchange_weak, "v.", "t") ATOMIC_BUILTIN(__hip_atomic_compare_exchange_strong, "v.", "t") ATOMIC_BUILTIN(__hip_atomic_exchange, "v.", "t") ATOMIC_BUILTIN(__hip_atomic_fetch_add, "v.", "t") diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index ce6e306..d3cb2ff 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -4681,6 +4681,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { return 2; case AO__opencl_atomic_load: + case AO__hip_atomic_load: case AO__c11_atomic_store: case AO__c11_atomic_exchange: case AO__atomic_load: @@ -4721,6 +4722,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { case AO__hip_atomic_fetch_min: case AO__hip_atomic_fetch_max: case AO__opencl_atomic_store: + case AO__hip_atomic_store: case AO__opencl_atomic_exchange: case AO__opencl_atomic_fetch_add: case AO__opencl_atomic_fetch_sub: @@ -4738,6 +4740,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { case AO__hip_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_weak: + case AO__hip_atomic_compare_exchange_weak: case AO__atomic_compare_exchange: case AO__atomic_compare_exchange_n: return 6; diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index fc267d7..b65a38d 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1691,7 +1691,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) { PrintExpr(Node->getPtr()); if (Node->getOp() != AtomicExpr::AO__c11_atomic_load && Node->getOp() != AtomicExpr::AO__atomic_load_n && - Node->getOp() != AtomicExpr::AO__opencl_atomic_load) { + Node->getOp() != AtomicExpr::AO__opencl_atomic_load && + Node->getOp() != AtomicExpr::AO__hip_atomic_load) { OS << ", "; PrintExpr(Node->getVal1()); } diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 9b507b8..b68e632 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -531,6 +531,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, return; case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, FailureOrder, Size, Order, Scope); return; @@ -566,6 +567,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, } case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: { llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); @@ -577,6 +579,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); @@ -846,6 +849,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: break; @@ -867,6 +871,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__atomic_compare_exchange: Val1 = EmitPointerWithAlignment(E->getVal1()); @@ -911,6 +916,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__atomic_store_n: @@ -1038,10 +1044,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_store_n: @@ -1103,6 +1112,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_compare_exchange: @@ -1135,6 +1145,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_store_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: LibCallName = "__atomic_store"; @@ -1147,6 +1158,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_load_N(T *mem, int order) case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_load_n: LibCallName = "__atomic_load"; @@ -1323,10 +1335,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || E->getOp() == AtomicExpr::AO__opencl_atomic_store || + E->getOp() == AtomicExpr::AO__hip_atomic_store || E->getOp() == AtomicExpr::AO__atomic_store || E->getOp() == AtomicExpr::AO__atomic_store_n; bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || E->getOp() == AtomicExpr::AO__opencl_atomic_load || + E->getOp() == AtomicExpr::AO__hip_atomic_load || E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e1bb6f3..ee06425 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5297,6 +5297,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) { case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: return OrderingCABI != llvm::AtomicOrderingCABI::release && @@ -5304,6 +5305,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) { case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: return OrderingCABI != llvm::AtomicOrderingCABI::consume && @@ -5380,7 +5382,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, "need to update code for modified C11 atomics"); bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init && Op <= AtomicExpr::AO__opencl_atomic_fetch_max; - bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_compare_exchange_strong && + bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_load && Op <= AtomicExpr::AO__hip_atomic_fetch_max; bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init && Op <= AtomicExpr::AO__c11_atomic_fetch_min) || @@ -5399,6 +5401,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: Form = Load; break; @@ -5409,6 +5412,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: Form = Copy; @@ -5474,6 +5478,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: Form = C11CmpXchg; break; @@ -5802,11 +5807,14 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, if ((Op == AtomicExpr::AO__c11_atomic_load || Op == AtomicExpr::AO__c11_atomic_store || Op == AtomicExpr::AO__opencl_atomic_load || - Op == AtomicExpr::AO__opencl_atomic_store ) && + Op == AtomicExpr::AO__hip_atomic_load || + Op == AtomicExpr::AO__opencl_atomic_store || + Op == AtomicExpr::AO__hip_atomic_store) && Context.AtomicUsesUnsupportedLibcall(AE)) Diag(AE->getBeginLoc(), diag::err_atomic_load_store_uses_lib) << ((Op == AtomicExpr::AO__c11_atomic_load || - Op == AtomicExpr::AO__opencl_atomic_load) + Op == AtomicExpr::AO__opencl_atomic_load || + Op == AtomicExpr::AO__hip_atomic_load) ? 0 : 1); diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu index 107f944..3ba7e32 100644 --- a/clang/test/CodeGenCUDA/atomic-ops.cu +++ b/clang/test/CodeGenCUDA/atomic-ops.cu @@ -3,6 +3,7 @@ // CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") @@ -10,8 +11,11 @@ // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4 +// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("singlethread-one-as") monotonic, align 4 __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); @@ -19,6 +23,8 @@ __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) { val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); return flag ? val : desired; } @@ -33,6 +39,7 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in // CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") @@ -40,8 +47,11 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4 +// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); @@ -49,6 +59,8 @@ __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) { val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); return flag ? val : desired; } @@ -63,6 +75,7 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v // CHECK-LABEL: @_Z21atomic32_op_workgroupPiii // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") @@ -70,8 +83,10 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); @@ -79,6 +94,7 @@ __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) { val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); return flag ? val : desired; } @@ -93,6 +109,7 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v // CHECK-LABEL: @_Z17atomic32_op_agentPiii // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") @@ -100,8 +117,10 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4 __device__ int atomic32_op_agent(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); @@ -109,6 +128,7 @@ __device__ int atomic32_op_agent(int *ptr, int val, int desired) { val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); return flag ? val : desired; } @@ -123,6 +143,7 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, // CHECK-LABEL: @_Z18atomic32_op_systemPiii // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") @@ -130,8 +151,11 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: load i32, i32* %{{.*}}, align 4 +// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("one-as") monotonic, align 4 __device__ int atomic32_op_system(int *ptr, int val, int desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); @@ -139,6 +163,8 @@ __device__ int atomic32_op_system(int *ptr, int val, int desired) { val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); return flag ? val : desired; } @@ -151,8 +177,9 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, return val; } -// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxxx +// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") @@ -160,8 +187,10 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -__device__ long long atomic64_op_singlethread(long long *ptr, long long val, long long desired) { +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 +__device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); @@ -169,20 +198,26 @@ __device__ long long atomic64_op_singlethread(long long *ptr, long long val, lon val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); return flag ? val : desired; } -// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyyy +// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") -__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { +// CHECK: load atomic i64, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8 +__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); return val; } -// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxxx +// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") @@ -190,8 +225,11 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -__device__ long long atomic64_op_wavefront(long long *ptr, long long val, long long desired) { +// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8 +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8 +__device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); @@ -199,20 +237,27 @@ __device__ long long atomic64_op_wavefront(long long *ptr, long long val, long l val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); return flag ? val : desired; } -// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyyy +// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") -__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { +// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8 +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8 +__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); return val; } -// CHECK-LABEL: @_Z21atomic64_op_workgroupPxxx +// CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") @@ -220,8 +265,10 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -__device__ long long atomic64_op_workgroup(long long *ptr, long long val, long long desired) { +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8 +__device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); @@ -229,20 +276,24 @@ __device__ long long atomic64_op_workgroup(long long *ptr, long long val, long l val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); return flag ? val : desired; } -// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyyy +// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") -__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8 +__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); return val; } -// CHECK-LABEL: @_Z17atomic64_op_agentPxxx +// CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") @@ -250,8 +301,10 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -__device__ long long atomic64_op_agent(long long *ptr, long long val, long long desired) { +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8 +__device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); @@ -259,20 +312,24 @@ __device__ long long atomic64_op_agent(long long *ptr, long long val, long long val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); return flag ? val : desired; } -// CHECK-LABEL: @_Z18atomicu64_op_agentPyyy +// CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") -__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8 +__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); return val; } -// CHECK-LABEL: @_Z18atomic64_op_systemPxxx +// CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") @@ -280,8 +337,11 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -__device__ long long atomic64_op_system(long long *ptr, long long val, long long desired) { +// CHECK: load i64, i64* %{{.*}}, align 8 +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8 +__device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) { bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); @@ -289,14 +349,20 @@ __device__ long long atomic64_op_system(long long *ptr, long long val, long long val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); return flag ? val : desired; } -// CHECK-LABEL: @_Z19atomicu64_op_systemPyyy +// CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") -__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { +// CHECK: load i64, i64* %{{.*}}, align 8 +// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8 +__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) { val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); return val; } diff --git a/clang/test/SemaCUDA/atomic-ops.cu b/clang/test/SemaCUDA/atomic-ops.cu new file mode 100644 index 0000000..af93b7e --- /dev/null +++ b/clang/test/SemaCUDA/atomic-ops.cu @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -verify -fsyntax-only %s + +#include "Inputs/cuda.h" + +__device__ int test_hip_atomic_load(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl) { + int val = __hip_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}} + val = __hip_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} + val = __hip_atomic_load(0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} + val = __hip_atomic_load(pi32, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}} + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}} + val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pi32, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pi32, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pi32, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pi32, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} + val = __hip_atomic_load(pu32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pll, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(pull, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(fp, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_load(dbl, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return val; +} + +__device__ int test_hip_atomic_store(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl, + int i32, unsigned int u32, long long i64, unsigned long long u64, float f32, double f64) { + __hip_atomic_store(0); // expected-error {{too few arguments to function call, expected 4, have 1}} + __hip_atomic_store(0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}} + __hip_atomic_store(0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} + __hip_atomic_store(pi32, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}} + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}} + __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, 0, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, 0, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} + __hip_atomic_store(pi32, 0, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} + __hip_atomic_store(pi32, 0, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} + __hip_atomic_store(pi32, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pu32, u32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pll, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pull, u64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(fp, f32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(dbl, f64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, u32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pi32, u64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(pll, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(fp, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(fp, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(dbl, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + __hip_atomic_store(dbl, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return 0; +} + +__device__ bool test_hip_atomic_cmpxchg_weak(int *ptr, int val, int desired) { + bool flag = __hip_atomic_compare_exchange_weak(0); // expected-error {{too few arguments to function call, expected 6, have 1}} + flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 6, have 7}} + flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} + flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}, expected-warning {{null passed to a callee that requires a non-null argument}} + flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, __HIP_MEMORY_SCOPE_SYSTEM); // expected-warning {{null passed to a callee that requires a non-null argument}} + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_CONSUME, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_CONSUME, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return flag; +} -- 2.7.4