From 63cc8e96c331b536eb59bc543b10cc4036e1c2a8 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 20 Mar 2018 14:45:59 +0000 Subject: [PATCH] [OPENMP, NVPTX] Globalization of the private redeclarations. If the generic codegen is enabled and private copy of the original variable escapes the declaration context, this private copy should be globalized just like it was the original variable. llvm-svn: 327985 --- clang/include/clang/Basic/Attr.td | 8 ++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 218 +++++++++++++++++++---------- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 4 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 26 ++-- clang/lib/Sema/SemaOpenMP.cpp | 54 ++++--- clang/test/OpenMP/nvptx_data_sharing.cpp | 13 +- 6 files changed, 215 insertions(+), 108 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 14fa0ee..ffd60c3 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2779,6 +2779,14 @@ def OMPCaptureKind : Attr { let Documentation = [Undocumented]; } +def OMPReferencedVar : Attr { + // This attribute has no spellings as it is only ever created implicitly. + let Spellings = []; + let SemaHandler = 0; + let Args = [ExprArgument<"Ref">]; + let Documentation = [Undocumented]; +} + def OMPDeclareSimdDecl : Attr { let Spellings = [Pragma<"omp", "declare simd">]; let Subjects = SubjectList<[Function]>; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index b4c943d..ccffa9c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -171,36 +171,48 @@ class CheckVarsEscapingDeclContext final : public ConstStmtVisitor { CodeGenFunction &CGF; llvm::SetVector EscapedDecls; + llvm::SetVector EscapedVariableLengthDecls; llvm::SmallPtrSet EscapedParameters; - llvm::SmallPtrSet IgnoredDecls; bool AllEscaped = false; RecordDecl *GlobalizedRD = nullptr; llvm::SmallDenseMap MappedDeclsFields; void markAsEscaped(const ValueDecl *VD) { - if (IgnoredDecls.count(VD)) - return; + VD = cast(VD->getCanonicalDecl()); // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast(VD))) { + if (!FD->hasAttrs()) + return; + const auto *Attr = FD->getAttr(); + if (!Attr) + return; + if (!isOpenMPPrivate( + static_cast(Attr->getCaptureKind())) || + Attr->getCaptureKind() == OMPC_map) + return; if (FD->getType()->isReferenceType()) return; + assert(!VD->getType()->isVariablyModifiedType() && + "Parameter captured by value with variably modified type"); EscapedParameters.insert(VD); } } - EscapedDecls.insert(VD); + if (VD->getType()->isVariablyModifiedType()) + EscapedVariableLengthDecls.insert(VD); + else + EscapedDecls.insert(VD); } void VisitValueDecl(const ValueDecl *VD) { - if (VD->getType()->isLValueReferenceType()) { + if (VD->getType()->isLValueReferenceType()) markAsEscaped(VD); - if (const auto *VarD = dyn_cast(VD)) { - if (!isa(VarD) && VarD->hasInit()) { - const bool SavedAllEscaped = AllEscaped; - AllEscaped = true; - Visit(VarD->getInit()); - AllEscaped = SavedAllEscaped; - } + if (const auto *VarD = dyn_cast(VD)) { + if (!isa(VarD) && VarD->hasInit()) { + const bool SavedAllEscaped = AllEscaped; + AllEscaped = VD->getType()->isLValueReferenceType(); + Visit(VarD->getInit()); + AllEscaped = SavedAllEscaped; } } } @@ -265,9 +277,7 @@ class CheckVarsEscapingDeclContext final } public: - CheckVarsEscapingDeclContext(CodeGenFunction &CGF, - ArrayRef IgnoredDecls) - : CGF(CGF), IgnoredDecls(IgnoredDecls.begin(), IgnoredDecls.end()) {} + CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {} virtual ~CheckVarsEscapingDeclContext() = default; void VisitDeclStmt(const DeclStmt *S) { if (!S) @@ -420,6 +430,12 @@ public: const llvm::SmallPtrSetImpl &getEscapedParameters() const { return EscapedParameters; } + + /// Returns the list of the escaped variables with the variably modified + /// types. + ArrayRef getEscapedVariableLengthDecls() const { + return EscapedVariableLengthDecls.getArrayRef(); + } }; } // anonymous namespace @@ -1247,63 +1263,103 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); if (I == FunctionGlobalizedDecls.end()) return; - const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord; - QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); - - // Recover pointer to this function's global record. The runtime will - // handle the specifics of the allocation of the memory. - // Use actual memory size of the record including the padding - // for alignment purposes. - unsigned Alignment = - CGM.getContext().getTypeAlignInChars(RecTy).getQuantity(); - unsigned GlobalRecordSize = - CGM.getContext().getTypeSizeInChars(RecTy).getQuantity(); - GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); - // TODO: allow the usage of shared memory to be controlled by - // the user, for now, default to global. - llvm::Value *GlobalRecordSizeArg[] = { - llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), - CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; - llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), - GlobalRecordSizeArg); - llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); - LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy); - I->getSecond().GlobalRecordAddr = GlobalRecValue; - - // Emit the "global alloca" which is a GEP from the global declaration record - // using the pointer returned by the runtime. - for (auto &Rec : I->getSecond().LocalVarData) { - bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); - llvm::Value *ParValue; - if (EscapedParam) { - const auto *VD = cast(Rec.first); - LValue ParLVal = - CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); - ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); - } - const FieldDecl *FD = Rec.second.first; - LValue VarAddr = CGF.EmitLValueForField(Base, FD); - Rec.second.second = VarAddr.getAddress(); - if (EscapedParam) { - const auto *VD = cast(Rec.first); - CGF.EmitStoreOfScalar(ParValue, VarAddr); - I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); + if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) { + QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); + + // Recover pointer to this function's global record. The runtime will + // handle the specifics of the allocation of the memory. + // Use actual memory size of the record including the padding + // for alignment purposes. + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(RecTy).getQuantity(); + unsigned GlobalRecordSize = + CGM.getContext().getTypeSizeInChars(RecTy).getQuantity(); + GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); + // TODO: allow the usage of shared memory to be controlled by + // the user, for now, default to global. + llvm::Value *GlobalRecordSizeArg[] = { + llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), + CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), + GlobalRecordSizeArg); + llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); + LValue Base = + CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy); + I->getSecond().GlobalRecordAddr = GlobalRecValue; + + // Emit the "global alloca" which is a GEP from the global declaration + // record using the pointer returned by the runtime. + for (auto &Rec : I->getSecond().LocalVarData) { + bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); + llvm::Value *ParValue; + if (EscapedParam) { + const auto *VD = cast(Rec.first); + LValue ParLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); + ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); + } + const FieldDecl *FD = Rec.second.first; + LValue VarAddr = CGF.EmitLValueForField(Base, FD); + Rec.second.second = VarAddr.getAddress(); + if (EscapedParam) { + const auto *VD = cast(Rec.first); + CGF.EmitStoreOfScalar(ParValue, VarAddr); + I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); + } } } + for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { + // Recover pointer to this function's global record. The runtime will + // handle the specifics of the allocation of the memory. + // Use actual memory size of the record including the padding + // for alignment purposes. + auto &Bld = CGF.Builder; + llvm::Value *Size = CGF.getTypeSize(VD->getType()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + Size = Bld.CreateNUWAdd( + Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1)); + llvm::Value *AlignVal = + llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity()); + Size = Bld.CreateUDiv(Size, AlignVal); + Size = Bld.CreateNUWMul(Size, AlignVal); + // TODO: allow the usage of shared memory to be controlled by + // the user, for now, default to global. + llvm::Value *GlobalRecordSizeArg[] = { + Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), + GlobalRecordSizeArg); + llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); + LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(), + CGM.getContext().getDeclAlign(VD), + AlignmentSource::Decl); + I->getSecond().MappedParams->setVarAddr(CGF, cast(VD), + Base.getAddress()); + I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue); + } I->getSecond().MappedParams->apply(CGF); } void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) { const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); - if (I != FunctionGlobalizedDecls.end() && I->getSecond().GlobalRecordAddr) { + if (I != FunctionGlobalizedDecls.end()) { I->getSecond().MappedParams->restore(CGF); if (!CGF.HaveInsertPoint()) return; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + for (llvm::Value *Addr : + llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + Addr); + } + if (I->getSecond().GlobalRecordAddr) { + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); + } } } @@ -2937,7 +2993,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, assert(D && "Expected function or captured|block decl."); assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && "Function is registered already."); - SmallVector IgnoredDecls; const Stmt *Body = nullptr; bool NeedToDelayGlobalization = false; if (const auto *FD = dyn_cast(D)) { @@ -2946,22 +3001,16 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Body = BD->getBody(); } else if (const auto *CD = dyn_cast(D)) { Body = CD->getBody(); - if (CGF.CapturedStmtInfo->getKind() == CR_OpenMP) { - NeedToDelayGlobalization = true; - if (const auto *CS = dyn_cast(Body)) { - IgnoredDecls.reserve(CS->capture_size()); - for (const auto &Capture : CS->captures()) - if (Capture.capturesVariable()) - IgnoredDecls.emplace_back(Capture.getCapturedVar()); - } - } + NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP; } if (!Body) return; - CheckVarsEscapingDeclContext VarChecker(CGF, IgnoredDecls); + CheckVarsEscapingDeclContext VarChecker(CGF); VarChecker.Visit(Body); const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord(); - if (!GlobalizedVarsRecord) + ArrayRef EscapedVariableLengthDecls = + VarChecker.getEscapedVariableLengthDecls(); + if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty()) return; auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().MappedParams = @@ -2970,8 +3019,11 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, I->getSecond().EscapedParameters.insert( VarChecker.getEscapedParameters().begin(), VarChecker.getEscapedParameters().end()); + I->getSecond().EscapedVariableLengthDecls.append( + EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end()); DeclToAddrMapTy &Data = I->getSecond().LocalVarData; for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { + assert(VD->isCanonicalDecl() && "Expected canonical declaration"); const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid()))); } @@ -2991,13 +3043,25 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { + VD = VD->getCanonicalDecl(); auto I = FunctionGlobalizedDecls.find(CGF.CurFn); if (I == FunctionGlobalizedDecls.end()) return Address::invalid(); auto VDI = I->getSecond().LocalVarData.find(VD); - if (VDI == I->getSecond().LocalVarData.end()) - return Address::invalid(); - return VDI->second.second; + if (VDI != I->getSecond().LocalVarData.end()) + return VDI->second.second; + if (VD->hasAttrs()) { + for (specific_attr_iterator IT(VD->attr_begin()), + E(VD->attr_end()); + IT != E; ++IT) { + auto VDI = I->getSecond().LocalVarData.find( + cast(cast(IT->getRef())->getDecl()) + ->getCanonicalDecl()); + if (VDI != I->getSecond().LocalVarData.end()) + return VDI->second.second; + } + } + return Address::invalid(); } void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 7b1944d..feb55c5 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -342,9 +342,11 @@ private: using EscapedParamsTy = llvm::SmallPtrSet; struct FunctionData { DeclToAddrMapTy LocalVarData; + EscapedParamsTy EscapedParameters; + llvm::SmallVector EscapedVariableLengthDecls; + llvm::SmallVector EscapedVariableLengthDeclsAddrs; const RecordDecl *GlobalRecord = nullptr; llvm::Value *GlobalRecordAddr = nullptr; - EscapedParamsTy EscapedParameters; std::unique_ptr MappedParams; }; /// Maps the function to the list of the globalized variables with their diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 02bb8ae..1246e5a 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1263,6 +1263,7 @@ static void emitEmptyBoundParameters(CodeGenFunction &, void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); @@ -1277,7 +1278,6 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; @@ -3981,12 +3981,12 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } @@ -4039,12 +4039,12 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { // Emit teams region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; @@ -4059,12 +4059,12 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, Action.Enter(CGF); // Emit teams region as a standalone region. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; @@ -4106,10 +4106,10 @@ emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4152,10 +4152,10 @@ static void emitTargetTeamsDistributeSimdRegion( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4197,10 +4197,10 @@ void CodeGenFunction::EmitOMPTeamsDistributeDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4219,10 +4219,10 @@ void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4242,10 +4242,10 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4265,10 +4265,10 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( // Emit teams region as a standalone region. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective( CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4290,10 +4290,10 @@ static void emitTargetTeamsDistributeParallelForRegion( // Emit teams region as a standalone region. auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective( CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4342,10 +4342,10 @@ static void emitTargetTeamsDistributeParallelForSimdRegion( // Emit teams region as a standalone region. auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); CGF.CGM.getOpenMPRuntime().emitInlinedDirective( CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); @@ -4614,12 +4614,12 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, auto *CS = S.getCapturedStmt(OMPD_parallel); Action.Enter(CGF); auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); - Action.Enter(CGF); // TODO: Add support for clauses. CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 7fdd08d..76b7f3a 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -803,7 +803,8 @@ void DSAStackTy::addDSA(ValueDecl *D, Expr *E, OpenMPClauseKind A, /// \brief Build a variable declaration for OpenMP loop iteration variable. static VarDecl *buildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type, - StringRef Name, const AttrVec *Attrs = nullptr) { + StringRef Name, const AttrVec *Attrs = nullptr, + DeclRefExpr *OrigRef = nullptr) { DeclContext *DC = SemaRef.CurContext; IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name); TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc); @@ -815,6 +816,10 @@ static VarDecl *buildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type, Decl->addAttr(*I); } Decl->setImplicit(); + if (OrigRef) { + Decl->addAttr( + OMPReferencedVarAttr::CreateImplicit(SemaRef.Context, OrigRef)); + } return Decl; } @@ -1462,7 +1467,11 @@ void Sema::setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level) { } if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective, NewLevel)) { - OMPC = OMPC_firstprivate; + OMPC = OMPC_map; + if (D->getType()->isScalarType() && + DSAStack->getDefaultDMAAtLevel(NewLevel) != + DefaultMapAttributes::DMA_tofrom_scalar) + OMPC = OMPC_firstprivate; break; } } @@ -1525,7 +1534,7 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { // region uses original variable for proper diagnostics. auto *VDPrivate = buildVarDecl( *this, DE->getExprLoc(), Type.getUnqualifiedType(), - VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr); + VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr, DRE); ActOnUninitializedDecl(VDPrivate); if (VDPrivate->isInvalidDecl()) continue; @@ -4206,9 +4215,12 @@ DeclRefExpr *OpenMPIterationSpaceChecker::BuildCounterVar( Expr *OpenMPIterationSpaceChecker::BuildPrivateCounterVar() const { if (LCDecl && !LCDecl->isInvalidDecl()) { auto Type = LCDecl->getType().getNonReferenceType(); - auto *PrivateVar = - buildVarDecl(SemaRef, DefaultLoc, Type, LCDecl->getName(), - LCDecl->hasAttrs() ? &LCDecl->getAttrs() : nullptr); + auto *PrivateVar = buildVarDecl( + SemaRef, DefaultLoc, Type, LCDecl->getName(), + LCDecl->hasAttrs() ? &LCDecl->getAttrs() : nullptr, + isa(LCDecl) + ? buildDeclRefExpr(SemaRef, cast(LCDecl), Type, DefaultLoc) + : nullptr); if (PrivateVar->isInvalidDecl()) return nullptr; return buildDeclRefExpr(SemaRef, PrivateVar, Type, DefaultLoc); @@ -9322,8 +9334,10 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, // IdResolver, so the code in the OpenMP region uses original variable for // proper diagnostics. Type = Type.getUnqualifiedType(); - auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(), - D->hasAttrs() ? &D->getAttrs() : nullptr); + auto VDPrivate = + buildVarDecl(*this, ELoc, Type, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr, + VD ? cast(SimpleRefExpr) : nullptr); ActOnUninitializedDecl(VDPrivate); if (VDPrivate->isInvalidDecl()) continue; @@ -9561,8 +9575,10 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, } Type = Type.getUnqualifiedType(); - auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(), - D->hasAttrs() ? &D->getAttrs() : nullptr); + auto VDPrivate = + buildVarDecl(*this, ELoc, Type, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr, + VD ? cast(SimpleRefExpr) : nullptr); // Generate helper private variable and initialize it with the value of the // original variable. The address of the original variable is replaced by // the address of the new private variable in the CodeGen. This new variable @@ -10454,8 +10470,10 @@ static bool ActOnOMPReductionKindClause( Context.getAsArrayType(D->getType().getNonReferenceType())) PrivateTy = D->getType().getNonReferenceType(); // Private copy. - auto *PrivateVD = buildVarDecl(S, ELoc, PrivateTy, D->getName(), - D->hasAttrs() ? &D->getAttrs() : nullptr); + auto *PrivateVD = + buildVarDecl(S, ELoc, PrivateTy, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr, + VD ? cast(SimpleRefExpr) : nullptr); // Add initializer for private variable. Expr *Init = nullptr; auto *LHSDRE = buildDeclRefExpr(S, LHSVD, Type, ELoc); @@ -10911,8 +10929,10 @@ OMPClause *Sema::ActOnOpenMPLinearClause( Type = Type.getNonReferenceType().getUnqualifiedType().getCanonicalType(); // Build private copy of original var. - auto *Private = buildVarDecl(*this, ELoc, Type, D->getName(), - D->hasAttrs() ? &D->getAttrs() : nullptr); + auto *Private = + buildVarDecl(*this, ELoc, Type, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr, + VD ? cast(SimpleRefExpr) : nullptr); auto *PrivateRef = buildDeclRefExpr(*this, Private, Type, ELoc); // Build var to save initial value. VarDecl *Init = buildVarDecl(*this, ELoc, Type, ".linear.start"); @@ -13072,8 +13092,10 @@ OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, } // Build the private variable and the expression that refers to it. - auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(), - D->hasAttrs() ? &D->getAttrs() : nullptr); + auto VDPrivate = + buildVarDecl(*this, ELoc, Type, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr, + VD ? cast(SimpleRefExpr) : nullptr); if (VDPrivate->isInvalidDecl()) continue; diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index 5e9ca2d..26ab3c0 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -18,8 +18,10 @@ void test_ds(){ a = 1000; } int b = 100; - #pragma omp parallel + int c = 1000; + #pragma omp parallel private(c) { + int *c1 = &c; b = a + 10000; } } @@ -73,6 +75,15 @@ void test_ds(){ // CK1: [[SHARGSTMP16:%.+]] = load i32*, i32** [[SHARGSTMP15]] // CK1: call void @__omp_outlined__{{.*}}({{.*}}, i32* [[SHARGSTMP16]]) +/// outlined function for the second parallel region /// + +// CK1: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i32* dereferenceable{{.+}}) +// CK1: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0) +// CK1: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]* +// CK1: [[C_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i32 0, i32 0 +// CK1: store i32* [[C_ADDR]], i32** % +// CK1: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]]) + /// ========= In the data sharing wrapper function ========= /// // CK1: {{.*}}define internal void @__omp_outlined{{.*}}wrapper({{.*}}) -- 2.7.4