[CUDA] Implement atomicInc and atomicDec builtins
authorJustin Lebar <jlebar@google.com>
Tue, 22 Mar 2016 00:09:28 +0000 (00:09 +0000)
committerJustin Lebar <jlebar@google.com>
Tue, 22 Mar 2016 00:09:28 +0000 (00:09 +0000)
These functions cannot be implemented as atomicrmw or cmpxchg
instructions, so they are implemented as a call to the NVVM intrinsics
@llvm.nvvm.atomic.load.inc.32.p0i32 and
@llvm.nvvm.atomic.load.dec.32.p0i32.

Patch by Jason Henline.

Reviewers: jlebar

Differential Revision: http://reviews.llvm.org/D18322

llvm-svn: 264009

clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-nvptx.c

index 858a62d..bebd78c 100644 (file)
@@ -7439,6 +7439,22 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(FnALAF32, {Ptr, Val});
   }
 
+  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *FnALI32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
+    return Builder.CreateCall(FnALI32, {Ptr, Val});
+  }
+
+  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *FnALD32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
+    return Builder.CreateCall(FnALD32, {Ptr, Val});
+  }
+
   default:
     return nullptr;
   }
index 745e74f..1a6f889 100644 (file)
@@ -189,7 +189,7 @@ __shared__ long long sll;
 
 // Check for atomic intrinsics
 // CHECK-LABEL: nvvm_atom
-__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
+__device__ void nvvm_atom(float *fp, float f, int *ip, int i, unsigned int *uip, unsigned ui, long *lp, long l,
                           long long *llp, long long ll) {
   // CHECK: atomicrmw add
   __nvvm_atom_add_gen_i(ip, i);
@@ -272,5 +272,11 @@ __device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
   // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
   __nvvm_atom_add_gen_f(fp, f);
 
+  // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32
+  __nvvm_atom_inc_gen_ui(uip, ui);
+
+  // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32
+  __nvvm_atom_dec_gen_ui(uip, ui);
+
   // CHECK: ret
 }