From: Yaxun (Sam) Liu Date: Tue, 11 May 2021 14:09:38 +0000 (-0400) Subject: [CUDA][HIP] Fix device template variables X-Git-Tag: llvmorg-14-init~6915 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=98575708da9544ccab8939fece9c3d638a32f09f;p=platform%2Fupstream%2Fllvm.git [CUDA][HIP] Fix device template variables Currently clang does not emit device template variables instantiated only in host functions, however, nvcc is able to do that: https://godbolt.org/z/fneEfferY This patch fixes this issue by refactoring and extending the existing mechanism for emitting static device var ODR-used by host only. Basically clang records device variables ODR-used by host code and force them to be emitted in device compilation. The existing mechanism makes sure these device variables ODR-used by host code are added to llvm.compiler-used, therefore they are guaranteed not to be deleted. It also fixes non-ODR-use of static device variable by host code causing static device variable to be emitted and registered, which should not. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D102237 --- diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index bef7938..6ebdca0 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1064,8 +1064,8 @@ public: // Implicitly-declared type 'struct _GUID'. mutable TagDecl *MSGuidTagDecl = nullptr; - /// Keep track of CUDA/HIP static device variables referenced by host code. - llvm::DenseSet CUDAStaticDeviceVarReferencedByHost; + /// Keep track of CUDA/HIP device-side variables ODR-used by host code. + llvm::DenseSet CUDADeviceVarODRUsedByHost; ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 8941d56..6eb8da7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11635,7 +11635,7 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { return mayExternalizeStaticVar(D) && (D->hasAttr() || - CUDAStaticDeviceVarReferencedByHost.count(cast(D))); + CUDADeviceVarODRUsedByHost.count(cast(D))); } StringRef ASTContext::getCUIDHash() const { diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 1cd718d..995b6a0 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -1015,10 +1015,14 @@ void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D, // Don't register a C++17 inline variable. The local symbol can be // discarded and referencing a discarded local symbol from outside the // comdat (__cuda_register_globals) is disallowed by the ELF spec. - // TODO: Reject __device__ constexpr and __device__ inline in Sema. + // // HIP managed variables need to be always recorded in device and host // compilations for transformation. + // + // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are + // added to llvm.compiler-used, therefore they are safe to be registered. if ((!D->hasExternalStorage() && !D->isInline()) || + CGM.getContext().CUDADeviceVarODRUsedByHost.contains(D) || D->hasAttr()) { registerDeviceVar(D, GV, !D->hasDefinition(), D->hasAttr()); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c61da1c..e3c843c 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2362,8 +2362,8 @@ void CodeGenModule::EmitDeferred() { } // Emit CUDA/HIP static device variables referenced by host code only. - if (getLangOpts().CUDA) - for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost) + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) + for (const auto *V : getContext().CUDADeviceVarODRUsedByHost) DeferredDeclsToEmit.push_back(V); // Stop if we're out of both deferred vtables and deferred declarations. diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 1390c17..719161f 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17136,10 +17136,7 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef, CaptureType, DeclRefType, FunctionScopeIndexToStopAt); - // Diagnose ODR-use of host global variables in device functions. Reference - // of device global variables in host functions is allowed through shadow - // variables therefore it is not diagnosed. - if (SemaRef.LangOpts.CUDA && SemaRef.LangOpts.CUDAIsDevice) { + if (SemaRef.LangOpts.CUDA) { auto *FD = dyn_cast_or_null(SemaRef.CurContext); auto Target = SemaRef.IdentifyCUDATarget(FD); auto IsEmittedOnDeviceSide = [](VarDecl *Var) { @@ -17155,9 +17152,28 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef, } return false; }; - if (Var && Var->hasGlobalStorage() && !IsEmittedOnDeviceSide(Var)) { - SemaRef.targetDiag(Loc, diag::err_ref_bad_target) - << /*host*/ 2 << /*variable*/ 1 << Var << Target; + if (Var && Var->hasGlobalStorage()) { + if (!IsEmittedOnDeviceSide(Var)) { + // Diagnose ODR-use of host global variables in device functions. + // Reference of device global variables in host functions is allowed + // through shadow variables therefore it is not diagnosed. + if (SemaRef.LangOpts.CUDAIsDevice) + SemaRef.targetDiag(Loc, diag::err_ref_bad_target) + << /*host*/ 2 << /*variable*/ 1 << Var << Target; + } else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) && + !Var->hasExternalStorage()) { + // Record a CUDA/HIP device side variable if it is ODR-used + // by host code. This is done conservatively, when the variable is + // referenced in any of the following contexts: + // - a non-function context + // - a host function + // - a host device function + // This makes the ODR-use of the device side variable by host code to + // be visible in the device compilation for the compiler to be able to + // emit template variables instantiated by host code only and to + // externalize the static device side variable ODR-used by host code. + SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var); + } } } @@ -18323,24 +18339,6 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc, if (Var->isInvalidDecl()) return; - // Record a CUDA/HIP static device/constant variable if it is referenced - // by host code. This is done conservatively, when the variable is referenced - // in any of the following contexts: - // - a non-function context - // - a host function - // - a host device function - // This also requires the reference of the static device/constant variable by - // host code to be visible in the device compilation for the compiler to be - // able to externalize the static device/constant variable. - if (SemaRef.getASTContext().mayExternalizeStaticVar(Var)) { - auto *CurContext = SemaRef.CurContext; - if (!CurContext || !isa(CurContext) || - cast(CurContext)->hasAttr() || - (!cast(CurContext)->hasAttr() && - !cast(CurContext)->hasAttr())) - SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var); - } - auto *MSI = Var->getMemberSpecializationInfo(); TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind() : Var->getTemplateSpecializationKind(); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 9bac4e8..e2f32ad 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -107,9 +107,14 @@ __constant__ int ext_constant_var_def = 2; #if __cplusplus > 201402L // NORDC17: @inline_var = internal global i32 undef, comdat, align 4{{$}} // RDC17: @inline_var = linkonce_odr global i32 undef, comdat, align 4{{$}} +// NORDC17-NOT: @inline_var2 = +// RDC17-NOT: @inline_var2 = // NORDC17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}} // RDC17: @_ZN1C17member_inline_varE = linkonce_odr constant i32 undef, comdat, align 4{{$}} +// Check inline variable ODR-used by host is emitted on host and registered. __device__ inline int inline_var = 3; +// Check inline variable not ODR-used by host is not emitted on host or registered. +__device__ inline int inline_var2 = 5; struct C { __device__ static constexpr int member_inline_var = 4; }; @@ -126,10 +131,17 @@ void use_pointers() { p = &ext_host_var; #if __cplusplus > 201402L p = &inline_var; + decltype(inline_var2) tmp; p = &C::member_inline_var; #endif } +__device__ void device_use() { +#if __cplusplus > 201402L + const int *p = &inline_var2; +#endif +} + // Make sure that all parts of GPU code init/cleanup are there: // * constant unnamed string with the device-side kernel name to be passed to // __hipRegisterFunction/__cudaRegisterFunction. @@ -212,7 +224,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 -// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var +// LNX_17-DAG: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var +// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var2 // ALL: ret void // Test that we've built a constructor. diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu index fd501ed1..b12300b 100644 --- a/clang/test/CodeGenCUDA/host-used-device-var.cu +++ b/clang/test/CodeGenCUDA/host-used-device-var.cu @@ -1,47 +1,95 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ -// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ -// RUN: | FileCheck %s +// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=DEV %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \ +// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST %s + +// Negative tests. + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=DEV-NEG %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \ +// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST-NEG %s #include "Inputs/cuda.h" // Check device variables used by neither host nor device functioins are not kept. -// CHECK-NOT: @v1 +// DEV-NEG-NOT: @v1 __device__ int v1; -// CHECK-NOT: @v2 +// DEV-NEG-NOT: @v2 __constant__ int v2; -// CHECK-NOT: @_ZL2v3 +// DEV-NEG-NOT: @_ZL2v3 static __device__ int v3; // Check device variables used by host functions are kept. -// CHECK-DAG: @u1 +// DEV-DAG: @u1 __device__ int u1; -// CHECK-DAG: @u2 +// DEV-DAG: @u2 __constant__ int u2; // Check host-used static device var is in llvm.compiler.used. -// CHECK-DAG: @_ZL2u3 +// DEV-DAG: @_ZL2u3 static __device__ int u3; // Check device-used static device var is emitted but is not in llvm.compiler.used. -// CHECK-DAG: @_ZL2u4 +// DEV-DAG: @_ZL2u4 static __device__ int u4; // Check device variables with used attribute are always kept. -// CHECK-DAG: @u5 +// DEV-DAG: @u5 __device__ __attribute__((used)) int u5; -int fun1() { - return u1 + u2 + u3; +// Test external device variable ODR-used by host code is not emitted or registered. +// DEV-NEG-NOT: @ext_var +extern __device__ int ext_var; + +// DEV-DAG: @inline_var = linkonce_odr addrspace(1) externally_initialized global i32 0 +__device__ inline int inline_var; + +template +using func_t = T (*) (T, T); + +template +__device__ T add_func (T x, T y) +{ + return x + y; +} + +// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global i32 (i32, i32)* @_Z8add_funcIiET_S0_S0_ +template +__device__ func_t p_add_func = add_func; + +void use(func_t p); +void use(int *p); + +void fun1() { + use(&u1); + use(&u2); + use(&u3); + use(&ext_var); + use(&inline_var); + use(p_add_func); } __global__ void kern1(int **x) { *x = &u4; } + // Check the exact list of variables to ensure @_ZL2u4 is not among them. -// CHECK: @llvm.compiler.used = {{[^@]*}} @_ZL2u3 {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5 +// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5 + +// HOST-DAG: hipRegisterVar{{.*}}@u1 +// HOST-DAG: hipRegisterVar{{.*}}@u2 +// HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3 +// HOST-DAG: hipRegisterVar{{.*}}@u5 +// HOST-DAG: hipRegisterVar{{.*}}@inline_var +// HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE +// HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var +// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4 diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu index f2cd173..2cfed3a 100644 --- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -2,12 +2,18 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=DEV %s +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ -// RUN: -emit-llvm -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=HOST %s +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST %s + +// Negative tests. + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV-NEG %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST-NEG %s #include "Inputs/cuda.h" @@ -52,7 +58,7 @@ static __constant__ int y; // Test static host variable, which should not be externalized nor registered. // HOST-DAG: @_ZL1z = internal global i32 0 -// DEV-NOT: @_ZL1z +// DEV-NEG-NOT: @_ZL1z static int z; // Test implicit static constant variable, which should not be externalized. @@ -72,6 +78,12 @@ static constexpr int z2 = 456; static __device__ int w; +// Test non-ODR-use of static device var should not be emitted or registered. +// DEV-NEG-NOT: @_ZL1u +// HOST-NEG-NOT: @_ZL1u + +static __device__ int u; + inline __device__ void devfun(const int ** b) { const static int p = 2; b[0] = &p; @@ -88,6 +100,7 @@ __global__ void kernel(int *a, const int **b) { a[3] = x3; a[4] = x4; a[5] = x5; + a[6] = sizeof(u); b[0] = &w; b[1] = &z2; b[2] = &local_static_constant; @@ -108,10 +121,12 @@ void foo(const int **a) { getDeviceSymbol(&w); z = 123; a[0] = &z2; + decltype(u) tmp; } -// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] -// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] -// HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]] -// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w -// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p +// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]] +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL1u +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index eac985f..aa5b510 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -2,19 +2,19 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=DEV,INT-DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=HOST,INT-HOST %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host // RUN: cat %t.host | FileCheck -check-prefixes=HOST,EXT-HOST %s // Check host and device compilations use the same postfixes for static @@ -22,6 +22,25 @@ // RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s +// Negative tests. + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefix=DEV-NEG %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefix=HOST-NEG %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev +// RUN: cat %t.dev | FileCheck -check-prefix=DEV-NEG %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host +// RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s + + #include "Inputs/cuda.h" // Test function scope static device variable, which should not be externalized. @@ -61,9 +80,14 @@ static __constant__ int y; // Test static host variable, which should not be externalized nor registered. // HOST-DAG: @_ZL1z = internal global i32 0 -// DEV-NOT: @_ZL1z +// DEV-NEG-NOT: @_ZL1z static int z; +// Test non-ODR-use of static device variable is not emitted or registered. +// DEV-NEG-NOT: @_ZL1u +// HOST-NEG-NOT: @_ZL1u +static __device__ int u; + // Test static device variable in inline function, which should not be // externalized nor registered. // DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat @@ -77,6 +101,7 @@ __global__ void kernel(int *a, const int **b) { const static int w = 1; a[0] = x; a[1] = y; + a[2] = sizeof(u); b[0] = &w; b[1] = &x2; devfun(b); @@ -88,10 +113,12 @@ void foo() { getDeviceSymbol(&x); getDeviceSymbol(&y); z = 123; + decltype(u) tmp; } -// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] -// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] -// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2 -// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w -// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p +// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL2x2 +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p +// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL1u