From b952d799cacdb7efd44c1c9468bb11471cc16874 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Fri, 3 Apr 2020 10:17:06 -0400 Subject: [PATCH] [cuda][hip] Fix `RegisterVar` function prototype. Summary: - `RegisterVar` has `void` return type and `size_t` in its variable size parameter in HIP or CUDA 9.0+. Reviewers: tra, yaxunl Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77398 --- clang/include/clang/Basic/Cuda.h | 1 + clang/lib/Basic/Cuda.cpp | 2 +- clang/lib/CodeGen/CGCUDANV.cpp | 12 +++++++++--- clang/test/CodeGenCUDA/device-stub.cu | 8 ++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index da57295..c2ebf87 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -117,6 +117,7 @@ enum class CudaFeature { CUDA_USES_FATBIN_REGISTER_END, }; +CudaVersion ToCudaVersion(llvm::VersionTuple); bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature); bool CudaFeatureEnabled(CudaVersion, CudaFeature); diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index e06d120..74eb547 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -362,7 +362,7 @@ CudaVersion MaxVersionForCudaArch(CudaArch A) { } } -static CudaVersion ToCudaVersion(llvm::VersionTuple Version) { +CudaVersion ToCudaVersion(llvm::VersionTuple Version) { int IVer = Version.getMajor() * 10 + Version.getMinor().getValueOr(0); switch(IVer) { diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 6d92ef3..351c505 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -440,13 +440,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { Builder.CreateCall(RegisterFunc, Args); } + llvm::Type *VarSizeTy = IntTy; + // For HIP or CUDA 9.0+, device variable size is type of `size_t`. + if (CGM.getLangOpts().HIP || + ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90) + VarSizeTy = SizeTy; + // void __cudaRegisterVar(void **, char *, char *, const char *, // int, int, int, int) llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, - CharPtrTy, IntTy, IntTy, + CharPtrTy, IntTy, VarSizeTy, IntTy, IntTy}; llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, RegisterVarParams, false), + llvm::FunctionType::get(VoidTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); // void __cudaRegisterSurface(void **, const struct surfaceReference *, // const void **, const char *, int, int); @@ -476,7 +482,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { VarName, VarName, llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()), - llvm::ConstantInt::get(IntTy, VarSize), + llvm::ConstantInt::get(VarSizeTy, VarSize), llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()), llvm::ConstantInt::get(IntTy, 0)}; Builder.CreateCall(RegisterVar, Args); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 9db5738..0f4a564 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -181,10 +181,10 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // Test that we've built a function to register kernels and global vars. // ALL: define internal void @__[[PREFIX]]_register_globals // ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc{{[^,]*}}, {{[^@]*}}@0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{[^,]*}}, {{[^@]*}}@1, {{.*}}i32 0, i32 4, i32 0, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, i32 4, i32 1, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, i32 4, i32 0, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, i32 4, i32 1, i32 0 +// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{[^,]*}}, {{[^@]*}}@1, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0 +// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 +// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0 +// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 // ALL: ret void // Test that we've built a constructor. -- 2.7.4