From: Alexey Bataev Date: Mon, 8 Apr 2019 16:53:57 +0000 (+0000) Subject: [OPENMP][NVPTX]Fixed processing of memory management directives. X-Git-Tag: llvmorg-10-init~8254 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1db9bfeba59b15bfb3f8592588e9aa9cd13ccd6b;p=platform%2Fupstream%2Fllvm.git [OPENMP][NVPTX]Fixed processing of memory management directives. Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923 --- diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 46b1b0f..632bca6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -318,6 +318,9 @@ class CheckVarsEscapingDeclContext final OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) return; VD = cast(VD->getCanonicalDecl()); + // Use user-specified allocation. + if (VD->hasAttrs() && VD->hasAttr()) + return; // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast(VD))) { @@ -4725,7 +4728,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { - bool UseDefaultAllocator = true; if (VD && VD->hasAttr()) { const auto *A = VD->getAttr(); switch (A->getAllocatorType()) { @@ -4733,17 +4735,48 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, // threadlocal. case OMPAllocateDeclAttr::OMPDefaultMemAlloc: case OMPAllocateDeclAttr::OMPThreadMemAlloc: - // Just pass-through to check if the globalization is required. - break; - case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: - case OMPAllocateDeclAttr::OMPCGroupMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: case OMPAllocateDeclAttr::OMPLowLatMemAlloc: - case OMPAllocateDeclAttr::OMPConstMemAlloc: - case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + // Follow the user decision - use default allocation. + return Address::invalid(); case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: - UseDefaultAllocator = false; - break; + // TODO: implement aupport for user-defined allocators. + return Address::invalid(); + case OMPAllocateDeclAttr::OMPConstMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } } } @@ -4769,11 +4802,6 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, } } - // TODO: replace it with return - // UseDefaultAllocator ? Address::invalid : - // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp - // supports __kmpc_alloc|__kmpc_free. - (void)UseDefaultAllocator; // Prevent a warning. return Address::invalid(); } diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp index ec1faff..9a285d0 100644 --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -24,6 +24,8 @@ extern const omp_allocator_handle_t omp_thread_mem_alloc; // CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0, // CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0, // CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32, +// CHECK-DAG: @bar_c = internal global i32 0, +// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00, struct St{ int a; }; @@ -64,13 +66,42 @@ int main () { #pragma omp allocate(a) allocator(omp_thread_mem_alloc) a=2; double b = 3; + float c; #pragma omp allocate(b) allocator(omp_default_mem_alloc) +#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc) return (foo()); } // CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() -// CHECK: alloca i32, +// CHECK-NOT: alloca i32, extern template int ST::m; + +void baz(float &); + +// CHECK: define void @{{.+}}bar{{.+}}() +void bar() { + // CHECK: alloca float, + float bar_a; + // CHECK: alloca double, + double bar_b; + int bar_c; +#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc) + // CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}) +#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc \ + : bar_a) allocate(omp_pteam_mem_alloc \ + : bar_b) + { + bar_b = bar_a; + baz(bar_a); + } +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) +// CHECK-NOT: alloca double, +// CHECK: alloca float, +// CHECK-NOT: alloca double, +// CHECK: load float, float* % +// CHECK: store double {{.+}}, double addrspace(3)* @bar_b, +} + #pragma omp end declare target #endif