static const unsigned PTXAddrSpaceMap[] = {
0, // opencl_global
4, // opencl_local
- 1 // opencl_constant
+ 1, // opencl_constant
+ 0, // cuda_device
+ 1, // cuda_constant
+ 4, // cuda_shared
};
class PTXTargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const unsigned TCEOpenCLAddrSpaceMap[] = {
3, // opencl_global
4, // opencl_local
- 5 // opencl_constant
+ 5, // opencl_constant
+ 0, // cuda_device
+ 0, // cuda_constant
+ 0 // cuda_shared
};
class TCETargetInfo : public TargetInfo{
DeferredDecls.erase(DDI);
}
+ unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
llvm::GlobalVariable *GV =
new llvm::GlobalVariable(getModule(), Ty->getElementType(), false,
llvm::GlobalValue::ExternalLinkage,
0, MangledName, 0,
- false, Ty->getAddressSpace());
+ false, AddrSpace);
// Handle things which are present even on external declarations.
if (D) {
GV->setThreadLocal(D->isThreadSpecified());
}
- return GV;
+ if (AddrSpace != Ty->getAddressSpace())
+ return llvm::ConstantExpr::getBitCast(GV, Ty);
+ else
+ return GV;
}
return llvmInit;
}
+unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
+ unsigned AddrSpace) {
+ if (LangOpts.CUDA && CodeGenOpts.CUDAIsDevice) {
+ if (D->hasAttr<CUDAConstantAttr>())
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
+ else if (D->hasAttr<CUDASharedAttr>())
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
+ else
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
+ }
+
+ return AddrSpace;
+}
+
void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
llvm::Constant *Init = 0;
QualType ASTTy = D->getType();
if (GV == 0 ||
GV->getType()->getElementType() != InitType ||
GV->getType()->getAddressSpace() !=
- getContext().getTargetAddressSpace(ASTTy)) {
+ GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) {
// Move the old entry aside so that we'll create a new one.
Entry->setName(StringRef());
CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty,
llvm::GlobalValue::LinkageTypes Linkage);
+ /// GetGlobalVarAddressSpace - Return the address space of the underlying
+ /// global variable for D, as determined by its declaration. Normally this
+ /// is the same as the address space of D's type, but in CUDA, address spaces
+ /// are associated with declarations, not types.
+ unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
+
/// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the
/// given global variable. If Ty is non-null and if the global doesn't exist,
/// then it will be greated with the specified type instead of whatever the
--- /dev/null
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple ptx32-unknown-unknown | FileCheck %s
+
+#include "../SemaCUDA/cuda.h"
+
+// CHECK: @i = global
+__device__ int i;
+
+// CHECK: @j = addrspace(1) global
+__constant__ int j;
+
+// CHECK: @k = addrspace(4) global
+__shared__ int k;
+
+__device__ void foo() {
+ // CHECK: load i32* @i
+ i++;
+
+ // CHECK: load i32* bitcast (i32 addrspace(1)* @j to i32*)
+ j++;
+
+ // CHECK: load i32* bitcast (i32 addrspace(4)* @k to i32*)
+ k++;
+}
+