From 8c1ec1ef38e361c76a7f71f8078e21220619cca4 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 2 Mar 2016 18:28:53 +0000 Subject: [PATCH] [CUDA] Do not generate unnecessary runtime init code. Differential Revision: http://reviews.llvm.org/D17780 llvm-svn: 262499 --- clang/lib/CodeGen/CGCUDANV.cpp | 15 ++++++++++++++- clang/test/CodeGenCUDA/device-stub.cu | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index f0ecb57..c678809 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -178,6 +178,10 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { + // No need to register anything + if (EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; + llvm::Function *RegisterKernelsFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule); @@ -251,6 +255,10 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { + // No need to generate ctors/dtors if there are no GPU binaries. + if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty()) + return nullptr; + // void __cuda_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // void ** __cudaRegisterFatBinary(void *); @@ -309,7 +317,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { CGM.getPointerAlign()); // Call __cuda_register_globals(GpuBinaryHandle); - CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + if (RegisterGlobalsFunc) + CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); // Save GpuBinaryHandle so we can unregister it in destructor. GpuBinaryHandles.push_back(GpuBinaryHandle); @@ -329,6 +338,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { + // No need for destructor if we don't have handles to unregister. + if (GpuBinaryHandles.empty()) + return nullptr; + // void __cudaUnregisterFatBinary(void ** handle); llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 81d23a2..93af5a3 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -1,7 +1,11 @@ // RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - -DNOGLOBALS \ +// RUN: | FileCheck %s -check-prefix=NOGLOBALS +// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN #include "Inputs/cuda.h" +#ifndef NOGLOBALS // CHECK-DAG: @device_var = internal global i32 __device__ int device_var; @@ -65,6 +69,7 @@ __global__ void kernelfunc(int i, int j, int k) {} // CHECK: call{{.*}}cudaConfigureCall // CHECK: call{{.*}}kernelfunc void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } +#endif // Test that we've built a function to register kernels and global vars. // CHECK: define internal void @__cuda_register_globals @@ -89,3 +94,18 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // CHECK: load{{.*}}__cuda_gpubin_handle // CHECK-NEXT: call void @__cudaUnregisterFatBinary +// There should be no __cuda_register_globals if we have no +// device-side globals, but we still need to register GPU binary. +// Skip GPU binary string first. +// NOGLOBALS: @0 = private unnamed_addr constant{{.*}} +// NOGLOBALS-NOT: define internal void @__cuda_register_globals +// NOGLOBALS: define internal void @__cuda_module_ctor +// NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper +// NOGLOBALS-NOT: call void @__cuda_register_globals +// NOGLOBALS: define internal void @__cuda_module_dtor +// NOGLOBALS: call void @__cudaUnregisterFatBinary + +// There should be no constructors/destructors if we have no GPU binary. +// NOGPUBIN-NOT: define internal void @__cuda_register_globals +// NOGPUBIN-NOT: define internal void @__cuda_module_ctor +// NOGPUBIN-NOT: define internal void @__cuda_module_dtor -- 2.7.4