OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
return;
VD = cast<ValueDecl>(VD->getCanonicalDecl());
+ // Use user-specified allocation.
+ if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())
+ return;
// Variables captured by value must be globalized.
if (auto *CSI = CGF.CapturedStmtInfo) {
if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
- bool UseDefaultAllocator = true;
if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
switch (A->getAllocatorType()) {
// threadlocal.
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
- // Just pass-through to check if the globalization is required.
- break;
- case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
- case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
- case OMPAllocateDeclAttr::OMPConstMemAlloc:
- case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+ // Follow the user decision - use default allocation.
+ return Address::invalid();
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
- UseDefaultAllocator = false;
- break;
+ // TODO: implement aupport for user-defined allocators.
+ return Address::invalid();
+ case OMPAllocateDeclAttr::OMPConstMemAlloc: {
+ llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), VarTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::Constant::getNullValue(VarTy), VD->getName(),
+ /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
+ CharUnits Align = CGM.getContext().getDeclAlign(VD);
+ GV->setAlignment(Align.getQuantity());
+ return Address(GV, Align);
+ }
+ case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
+ llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), VarTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::Constant::getNullValue(VarTy), VD->getName(),
+ /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ CharUnits Align = CGM.getContext().getDeclAlign(VD);
+ GV->setAlignment(Align.getQuantity());
+ return Address(GV, Align);
+ }
+ case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+ case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
+ llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), VarTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::Constant::getNullValue(VarTy), VD->getName());
+ CharUnits Align = CGM.getContext().getDeclAlign(VD);
+ GV->setAlignment(Align.getQuantity());
+ return Address(GV, Align);
+ }
}
}
}
}
- // TODO: replace it with return
- // UseDefaultAllocator ? Address::invalid :
- // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
- // supports __kmpc_alloc|__kmpc_free.
- (void)UseDefaultAllocator; // Prevent a warning.
return Address::invalid();
}
// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,
// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
+// CHECK-DAG: @bar_c = internal global i32 0,
+// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,
struct St{
int a;
};
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
double b = 3;
+ float c;
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
+#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)
return (foo<int>());
}
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
-// CHECK: alloca i32,
+// CHECK-NOT: alloca i32,
extern template int ST<int>::m;
+
+void baz(float &);
+
+// CHECK: define void @{{.+}}bar{{.+}}()
+void bar() {
+ // CHECK: alloca float,
+ float bar_a;
+ // CHECK: alloca double,
+ double bar_b;
+ int bar_c;
+#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)
+ // CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}})
+#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc \
+ : bar_a) allocate(omp_pteam_mem_alloc \
+ : bar_b)
+ {
+ bar_b = bar_a;
+ baz(bar_a);
+ }
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+// CHECK-NOT: alloca double,
+// CHECK: alloca float,
+// CHECK-NOT: alloca double,
+// CHECK: load float, float* %
+// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
+}
+
#pragma omp end declare target
#endif