[CUDA][HIP] Fix linkage for -fgpu-rdc

author Yaxun (Sam) Liu <yaxun.liu@amd.com>

Wed, 28 Oct 2020 14:44:21 +0000 (10:44 -0400)

committer Yaxun (Sam) Liu <yaxun.liu@amd.com>

Tue, 3 Nov 2020 13:07:19 +0000 (08:07 -0500)
author Yaxun (Sam) Liu <yaxun.liu@amd.com>
Wed, 28 Oct 2020 14:44:21 +0000 (10:44 -0400)
committer Yaxun (Sam) Liu <yaxun.liu@amd.com>
Tue, 3 Nov 2020 13:07:19 +0000 (08:07 -0500)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp

index 9512b35..1efc39b 100644 (file)
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4483,13 +4483,16 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
    // and must all be equivalent. However, we are not allowed to
    // throw away these explicit instantiations.
    //
-  // We don't currently support CUDA device code spread out across multiple TUs,
+  // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU,
    // so say that CUDA templates are either external (for kernels) or internal.
-  // This lets llvm perform aggressive inter-procedural optimizations.
+  // This lets llvm perform aggressive inter-procedural optimizations. For
+  // -fgpu-rdc case, device function calls across multiple TU's are allowed,
+  // therefore we need to follow the normal linkage paradigm.
    if (Linkage == GVA_StrongODR) {
-    if (Context.getLangOpts().AppleKext)
+    if (getLangOpts().AppleKext)
        return llvm::Function::ExternalLinkage;
-    if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
+    if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+        !getLangOpts().GPURelocatableDeviceCode)
        return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
                                            : llvm::Function::InternalLinkage;
      return llvm::Function::WeakODRLinkage;
diff --git a/clang/test/CodeGenCUDA/device-fun-linkage.cu b/clang/test/CodeGenCUDA/device-fun-linkage.cu

new file mode 100644 (file)

index 0000000..10b0f17
--- /dev/null
+++ b/clang/test/CodeGenCUDA/device-fun-linkage.cu
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN:   -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefix=NORDC %s
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN:   -fgpu-rdc -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefix=RDC %s
+
+#include "Inputs/cuda.h"
+
+// NORDC: define internal void @_Z4funcIiEvv()
+// NORDC: define void @_Z6kernelIiEvv()
+// RDC: define weak_odr void @_Z4funcIiEvv()
+// RDC: define weak_odr void @_Z6kernelIiEvv()
+
+template <typename T> __device__ void func() {}
+template <typename T> __global__ void kernel() {}
+
+template __device__ void func<int>();
+template __global__ void kernel<int>();
author	Yaxun (Sam) Liu <yaxun.liu@amd.com>
	Wed, 28 Oct 2020 14:44:21 +0000 (10:44 -0400)
committer	Yaxun (Sam) Liu <yaxun.liu@amd.com>
	Tue, 3 Nov 2020 13:07:19 +0000 (08:07 -0500)
clang/lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
clang/test/CodeGenCUDA/device-fun-linkage.cu	[new file with mode: 0644]	patch \| blob