From: Saiyedul Islam Date: Fri, 29 May 2020 14:16:07 +0000 (+0000) Subject: [AMDGPU] Introduce Clang builtins to be mapped to AMDGCN atomic inc/dec intrinsics X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=675cefbf60270f59057972e33365a09590fb3694;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Introduce Clang builtins to be mapped to AMDGCN atomic inc/dec intrinsics Summary: __builtin_amdgcn_atomic_inc32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope) __builtin_amdgcn_atomic_inc64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope) __builtin_amdgcn_atomic_dec32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope) __builtin_amdgcn_atomic_dec64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope) First and second arguments gets transparently passed to the amdgcn atomic inc/dec intrinsic. Fifth argument of the intrinsic is set as true if the first argument of the builtin is a volatile pointer. The third argument of this builtin is one of the memory-ordering specifiers ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL, or ATOMIC_SEQ_CST following C++11 memory model semantics. This is mapped to corresponding LLVM atomic memory ordering for the atomic inc/dec instruction using CLANG atomic C ABI. The fourth argument is an AMDGPU-specific synchronization scope defined as string. Reviewers: arsenm, sameerds, JonChesterfield, jdoerfert Reviewed By: arsenm, sameerds Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, kerbowa, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80804 --- diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 2837914..9add10c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -60,6 +60,12 @@ BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n") BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n") BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n") +BUILTIN(__builtin_amdgcn_atomic_inc32, "ZiZiD*ZiUicC*", "n") +BUILTIN(__builtin_amdgcn_atomic_inc64, "WiWiD*WiUicC*", "n") + +BUILTIN(__builtin_amdgcn_atomic_dec32, "ZiZiD*ZiUicC*", "n") +BUILTIN(__builtin_amdgcn_atomic_dec64, "WiWiD*WiUicC*", "n") + // FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bfc78ce..f0092e2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14301,8 +14301,49 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { } } // namespace +// For processing memory ordering and memory scope arguments of various +// amdgcn builtins. +// \p Order takes a C++11 comptabile memory-ordering specifier and converts +// it into LLVM's memory ordering specifier using atomic C ABI, and writes +// to \p AO. \p Scope takes a const char * and converts it into AMDGCN +// specific SyncScopeID and writes it to \p SSID. +bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID) { + if (isa(Order)) { + int ord = cast(Order)->getZExtValue(); + + // Map C11/C++11 memory ordering to LLVM memory ordering + switch (static_cast(ord)) { + case llvm::AtomicOrderingCABI::acquire: + AO = llvm::AtomicOrdering::Acquire; + break; + case llvm::AtomicOrderingCABI::release: + AO = llvm::AtomicOrdering::Release; + break; + case llvm::AtomicOrderingCABI::acq_rel: + AO = llvm::AtomicOrdering::AcquireRelease; + break; + case llvm::AtomicOrderingCABI::seq_cst: + AO = llvm::AtomicOrdering::SequentiallyConsistent; + break; + case llvm::AtomicOrderingCABI::consume: + case llvm::AtomicOrderingCABI::relaxed: + break; + } + + StringRef scp; + llvm::getConstantStringInfo(Scope, scp); + SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + return true; + } + return false; +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; + llvm::SyncScope::ID SSID; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { @@ -14507,38 +14548,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_fence: { - llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; - llvm::SyncScope::ID SSID; - Value *Order = EmitScalarExpr(E->getArg(0)); - Value *Scope = EmitScalarExpr(E->getArg(1)); + if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), + EmitScalarExpr(E->getArg(1)), AO, SSID)) + return Builder.CreateFence(AO, SSID); + LLVM_FALLTHROUGH; + } + case AMDGPU::BI__builtin_amdgcn_atomic_inc32: + case AMDGPU::BI__builtin_amdgcn_atomic_inc64: + case AMDGPU::BI__builtin_amdgcn_atomic_dec32: + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: { + unsigned BuiltinAtomicOp; + llvm::Type *ResultType = ConvertType(E->getType()); - if (isa(Order)) { - int ord = cast(Order)->getZExtValue(); + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_atomic_inc32: + case AMDGPU::BI__builtin_amdgcn_atomic_inc64: + BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc; + break; + case AMDGPU::BI__builtin_amdgcn_atomic_dec32: + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: + BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec; + break; + } - // Map C11/C++11 memory ordering to LLVM memory ordering - switch (static_cast(ord)) { - case llvm::AtomicOrderingCABI::acquire: - AO = llvm::AtomicOrdering::Acquire; - break; - case llvm::AtomicOrderingCABI::release: - AO = llvm::AtomicOrdering::Release; - break; - case llvm::AtomicOrderingCABI::acq_rel: - AO = llvm::AtomicOrdering::AcquireRelease; - break; - case llvm::AtomicOrderingCABI::seq_cst: - AO = llvm::AtomicOrdering::SequentiallyConsistent; - break; - case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence - case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence - break; - } + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); - StringRef scp; - llvm::getConstantStringInfo(Scope, scp); - SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + llvm::Function *F = + CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()}); - return Builder.CreateFence(AO, SSID); + if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)), AO, SSID)) { + + // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and + // scope as unsigned values + Value *MemOrder = Builder.getInt32(static_cast(AO)); + Value *MemScope = Builder.getInt32(static_cast(SSID)); + + QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); + bool Volatile = + PtrTy->castAs()->getPointeeType().isVolatileQualified(); + Value *IsVolatile = Builder.getInt1(static_cast(Volatile)); + + return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile}); } LLVM_FALLTHROUGH; } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index c9fcaf9..48b2368 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3988,6 +3988,9 @@ public: llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID); private: enum class MSVCIntrin; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 0e451d6..85126e0 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3070,41 +3070,56 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + // position of memory order and scope arguments in the builtin + unsigned OrderIndex, ScopeIndex; switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_fence: { - ExprResult Arg = TheCall->getArg(0); - auto ArgExpr = Arg.get(); - Expr::EvalResult ArgResult; - - if (!ArgExpr->EvaluateAsInt(ArgResult, Context)) - return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) - << ArgExpr->getType(); - int ord = ArgResult.Val.getInt().getZExtValue(); - - // Check valididty of memory ordering as per C11 / C++11's memody model. - switch (static_cast(ord)) { - case llvm::AtomicOrderingCABI::acquire: - case llvm::AtomicOrderingCABI::release: - case llvm::AtomicOrderingCABI::acq_rel: - case llvm::AtomicOrderingCABI::seq_cst: - break; - default: { - return Diag(ArgExpr->getBeginLoc(), - diag::warn_atomic_op_has_invalid_memory_order) - << ArgExpr->getSourceRange(); - } - } + case AMDGPU::BI__builtin_amdgcn_atomic_inc32: + case AMDGPU::BI__builtin_amdgcn_atomic_inc64: + case AMDGPU::BI__builtin_amdgcn_atomic_dec32: + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: + OrderIndex = 2; + ScopeIndex = 3; + break; + case AMDGPU::BI__builtin_amdgcn_fence: + OrderIndex = 0; + ScopeIndex = 1; + break; + default: + return false; + } - Arg = TheCall->getArg(1); - ArgExpr = Arg.get(); - Expr::EvalResult ArgResult1; - // Check that sync scope is a constant literal - if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen, - Context)) - return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal) - << ArgExpr->getType(); - } break; + ExprResult Arg = TheCall->getArg(OrderIndex); + auto ArgExpr = Arg.get(); + Expr::EvalResult ArgResult; + + if (!ArgExpr->EvaluateAsInt(ArgResult, Context)) + return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) + << ArgExpr->getType(); + int ord = ArgResult.Val.getInt().getZExtValue(); + + // Check valididty of memory ordering as per C11 / C++11's memody model. + switch (static_cast(ord)) { + case llvm::AtomicOrderingCABI::acquire: + case llvm::AtomicOrderingCABI::release: + case llvm::AtomicOrderingCABI::acq_rel: + case llvm::AtomicOrderingCABI::seq_cst: + break; + default: { + return Diag(ArgExpr->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << ArgExpr->getSourceRange(); + } } + + Arg = TheCall->getArg(ScopeIndex); + ArgExpr = Arg.get(); + Expr::EvalResult ArgResult1; + // Check that sync scope is a constant literal + if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen, + Context)) + return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal) + << ArgExpr->getType(); + return false; } diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp new file mode 100644 index 0000000..535c3d7 --- /dev/null +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -0,0 +1,253 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ +// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s + +__attribute__((device)) void test_non_volatile_parameter32(int *ptr) { + // CHECK-LABEL: test_non_volatile_parameter32 + int res; + // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5) + // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32** + // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5) + // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32* + // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %2 = load i32, i32* %1, align 4 + // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4 + res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %6 = load i32, i32* %5, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4 + res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_non_volatile_parameter64(__INT64_TYPE__ *ptr) { + // CHECK-LABEL: test_non_volatile_parameter64 + __INT64_TYPE__ res; + // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5) + // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64** + // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5) + // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64* + // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %2 = load i64, i64* %1, align 8 + // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8 + res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %6 = load i64, i64* %5, align 8 + // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8 + res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_volatile_parameter32(volatile int *ptr) { + // CHECK-LABEL: test_volatile_parameter32 + int res; + // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5) + // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32** + // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5) + // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32* + // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %2 = load volatile i32, i32* %1, align 4 + // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 true) + // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4 + res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %6 = load volatile i32, i32* %5, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 true) + // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4 + res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_volatile_parameter64(volatile __INT64_TYPE__ *ptr) { + // CHECK-LABEL: test_volatile_parameter64 + __INT64_TYPE__ res; + // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5) + // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64** + // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5) + // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64* + // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %2 = load volatile i64, i64* %1, align 8 + // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 true) + // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8 + res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8 + // CHECK-NEXT: %6 = load volatile i64, i64* %5, align 8 + // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 true) + // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8 + res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_shared32() { + // CHECK-LABEL: test_shared32 + __attribute__((shared)) int val; + + // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 + // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %0, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 + // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_shared64() { + // CHECK-LABEL: test_shared64 + __attribute__((shared)) __INT64_TYPE__ val; + + // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 + // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %0, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 + // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); +} + +int global_val32; +__attribute__((device)) void test_global32() { + // CHECK-LABEL: test_global32 + // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 + // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %0, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 + global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 + // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 + global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); +} + +__INT64_TYPE__ global_val64; +__attribute__((device)) void test_global64() { + // CHECK-LABEL: test_global64 + // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 + // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %0, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 + global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 + // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 + global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((constant)) int cval32; +__attribute__((device)) void test_constant32() { + // CHECK-LABEL: test_constant32 + int local_val; + + // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 + // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %0, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %1, i32* %local_val.ascast, align 4 + local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 + // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i32 %3, i32* %local_val.ascast, align 4 + local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((constant)) __INT64_TYPE__ cval64; +__attribute__((device)) void test_constant64() { + // CHECK-LABEL: test_constant64 + __INT64_TYPE__ local_val; + + // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 + // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %0, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %1, i64* %local_val.ascast, align 8 + local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 + // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %2, i32 7, i32 2, i1 false) + // CHECK-NEXT: store i64 %3, i64* %local_val.ascast, align 8 + local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_order32() { + // CHECK-LABEL: test_order32 + __attribute__((shared)) int val; + + // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %0, i32 4, i32 2, i1 false) + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup"); + + // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %2, i32 5, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup"); + + // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %4, i32 6, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup"); + + // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %6, i32 7, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_order64() { + // CHECK-LABEL: test_order64 + __attribute__((shared)) __INT64_TYPE__ val; + + // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %0, i32 4, i32 2, i1 false) + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup"); + + // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %2, i32 5, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup"); + + // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %4, i32 6, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup"); + + // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %6, i32 7, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); +} + +__attribute__((device)) void test_scope32() { + // CHECK-LABEL: test_scope32 + __attribute__((shared)) int val; + + // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %0, i32 7, i32 1, i1 false) + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, ""); + + // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %4, i32 7, i32 3, i1 false) + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent"); + + // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %6, i32 7, i32 4, i1 false) + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront"); +} + +__attribute__((device)) void test_scope64() { + // CHECK-LABEL: test_scope64 + __attribute__((shared)) __INT64_TYPE__ val; + + // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %0, i32 7, i32 1, i1 false) + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, ""); + + // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false) + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %4, i32 7, i32 3, i1 false) + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent"); + + // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %6, i32 7, i32 4, i1 false) + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront"); +} diff --git a/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp new file mode 100644 index 0000000..c08b00b --- /dev/null +++ b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp @@ -0,0 +1,18 @@ +// REQUIRES: amdgpu-registered-target +// RUN: not %clang_cc1 %s -x hip -fcuda-is-device -o - -emit-llvm -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s + +void test_host() { + int val; + + // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, ""); + + // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, ""); + + // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, ""); + + // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, ""); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl index dbe2900..e2d9082 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -144,3 +144,51 @@ void test_s_setreg(int x, int y) { __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}} __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}} } + +void test_atomic_inc32() { + int val = 17; + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_inc32(4); // expected-error {{too few arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_inc32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_inc32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}} + const char ptr[] = "workgroup"; + val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}} +} + +void test_atomic_inc64() { + __INT64_TYPE__ val = 17; + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_inc64(4); // expected-error {{too few arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_inc64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_inc64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}} + const char ptr[] = "workgroup"; + val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}} +} + +void test_atomic_dec32() { + int val = 17; + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_dec32(4); // expected-error {{too few arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_dec32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_dec32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}} + const char ptr[] = "workgroup"; + val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}} +} + +void test_atomic_dec64() { + __INT64_TYPE__ val = 17; + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + val = __builtin_amdgcn_atomic_dec64(4); // expected-error {{too few arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_dec64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}} + val = __builtin_amdgcn_atomic_dec64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}} + const char ptr[] = "workgroup"; + val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}} +}