From 3f6469b4c632b416d1365c32ddf852e833abc910 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 3 Nov 2014 16:51:53 +0000 Subject: [PATCH] Emit OpenCL local global variables without zeorinitializer Local variables are not initialized, and every target has been (incorrectly) ignoring the unnecessary request for zero initialization. llvm-svn: 221162 --- clang/lib/CodeGen/CGDecl.cpp | 10 ++++++++- .../test/CodeGenOpenCL/local-initializer-undef.cl | 24 ++++++++++++++++++++++ clang/test/CodeGenOpenCL/local.cl | 6 ++++-- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/local-initializer-undef.cl diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 81ca2e6..959ac9a 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -189,10 +189,18 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty); unsigned AddrSpace = GetGlobalVarAddressSpace(&D, getContext().getTargetAddressSpace(Ty)); + + // Local address space cannot have an initializer. + llvm::Constant *Init = nullptr; + if (Ty.getAddressSpace() != LangAS::opencl_local) + Init = EmitNullConstant(Ty); + else + Init = llvm::UndefValue::get(LTy); + llvm::GlobalVariable *GV = new llvm::GlobalVariable(getModule(), LTy, Ty.isConstant(getContext()), Linkage, - EmitNullConstant(D.getType()), Name, nullptr, + Init, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); diff --git a/clang/test/CodeGenOpenCL/local-initializer-undef.cl b/clang/test/CodeGenOpenCL/local-initializer-undef.cl new file mode 100644 index 0000000..5d34f56 --- /dev/null +++ b/clang/test/CodeGenOpenCL/local-initializer-undef.cl @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s + +typedef struct Foo { + int x; + float y; + float z; +} Foo; + +// CHECK-DAG: @test.lds_int = internal addrspace(2) global i32 undef +// CHECK-DAG: @test.lds_int_arr = internal addrspace(2) global [128 x i32] undef +// CHECK-DAG: @test.lds_struct = internal addrspace(2) global %struct.Foo undef +// CHECK-DAG: @test.lds_struct_arr = internal addrspace(2) global [64 x %struct.Foo] undef +__kernel void test() +{ + __local int lds_int; + __local int lds_int_arr[128]; + __local Foo lds_struct; + __local Foo lds_struct_arr[64]; + + lds_int = 1; + lds_int_arr[0] = 1; + lds_struct.x = 1; + lds_struct_arr[0].x = 1; +} diff --git a/clang/test/CodeGenOpenCL/local.cl b/clang/test/CodeGenOpenCL/local.cl index 895c8fa..f1031cd 100644 --- a/clang/test/CodeGenOpenCL/local.cl +++ b/clang/test/CodeGenOpenCL/local.cl @@ -1,9 +1,11 @@ // RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck %s +void func(local int*); + __kernel void foo(void) { - // CHECK: @foo.i = internal unnamed_addr addrspace(2) + // CHECK: @foo.i = internal addrspace(2) global i32 undef __local int i; - ++i; + func(&i); } // CHECK-LABEL: define void @_Z3barPU7CLlocali -- 2.7.4