From 6ab5bb115a3491e799dfe00da39136f02247c251 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 29 Oct 2018 15:01:58 +0000 Subject: [PATCH] [OPENMP] Do not capture private loop counters. If the loop counter is not declared in the context of the loop and it is private, such loop counters should not be captured in the outlined regions. llvm-svn: 345505 --- clang/include/clang/Parse/Parser.h | 2 + clang/include/clang/Sema/Sema.h | 4 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 11 ++- clang/lib/Sema/SemaOpenMP.cpp | 106 +++++++++++++++++---- clang/test/OpenMP/debug-info-openmp-array.cpp | 2 +- clang/test/OpenMP/parallel_for_codegen.cpp | 8 +- clang/test/OpenMP/taskloop_codegen.cpp | 2 +- .../test/OpenMP/taskloop_firstprivate_messages.cpp | 7 +- clang/test/OpenMP/taskloop_reduction_codegen.cpp | 2 +- .../OpenMP/taskloop_simd_firstprivate_messages.cpp | 3 + 10 files changed, 116 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 8df6300..a8a55ed 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2144,6 +2144,8 @@ private: // 'for-init-statement' part of a 'for' statement. /// Returns true for declaration, false for expression. bool isForInitDeclaration() { + if (getLangOpts().OpenMP) + Actions.startOpenMPLoop(); if (getLangOpts().CPlusPlus) return isCXXSimpleDeclaration(/*AllowForRangeDecl=*/true); return isDeclarationSpecifier(true); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 63ea45f..373e961 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8686,6 +8686,10 @@ public: ExprResult getOpenMPCapturedExpr(VarDecl *Capture, ExprValueKind VK, ExprObjectKind OK, SourceLocation Loc); + /// If the current region is a loop-based region, mark the start of the loop + /// construct. + void startOpenMPLoop(); + /// Check if the specified variable is used in 'private' clause. /// \param Level Relative level of nested OpenMP construct for that the check /// is performed. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f410c59..c893795 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4949,10 +4949,16 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( if (isOpenMPSimdDirective(D.getDirectiveKind())) { emitOMPSimdRegion(CGF, cast(D), Action); } else { + OMPPrivateScope LoopGlobals(CGF); if (const auto *LD = dyn_cast(&D)) { for (const Expr *E : LD->counters()) { - if (const auto *VD = dyn_cast( - cast(E)->getDecl())) { + const auto *VD = dyn_cast(cast(E)->getDecl()); + if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { + LValue GlobLVal = CGF.EmitLValue(E); + LoopGlobals.addPrivate( + VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); + } + if (const auto *CED = dyn_cast(VD)) { // Emit only those that were not explicitly referenced in clauses. if (!CGF.LocalDeclMap.count(VD)) CGF.EmitVarDecl(*VD); @@ -4973,6 +4979,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( } } } + LoopGlobals.Privatize(); CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); } }; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 92d1514..a6d1a11 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -138,9 +138,11 @@ private: /// 'ordered' clause, the second one is true if the regions has 'ordered' /// clause, false otherwise. llvm::Optional> OrderedRegion; + unsigned AssociatedLoops = 1; + const Decl *PossiblyLoopCounter = nullptr; bool NowaitRegion = false; bool CancelRegion = false; - unsigned AssociatedLoops = 1; + bool LoopStart = false; SourceLocation InnerTeamsRegionLoc; /// Reference to the taskgroup task_reduction reference expression. Expr *TaskgroupReductionRef = nullptr; @@ -208,6 +210,33 @@ public: Stack.back().first.pop_back(); } + /// Marks that we're started loop parsing. + void loopInit() { + assert(isOpenMPLoopDirective(getCurrentDirective()) && + "Expected loop-based directive."); + Stack.back().first.back().LoopStart = true; + } + /// Start capturing of the variables in the loop context. + void loopStart() { + assert(isOpenMPLoopDirective(getCurrentDirective()) && + "Expected loop-based directive."); + Stack.back().first.back().LoopStart = false; + } + /// true, if variables are captured, false otherwise. + bool isLoopStarted() const { + assert(isOpenMPLoopDirective(getCurrentDirective()) && + "Expected loop-based directive."); + return !Stack.back().first.back().LoopStart; + } + /// Marks (or clears) declaration as possibly loop counter. + void resetPossibleLoopCounter(const Decl *D = nullptr) { + Stack.back().first.back().PossiblyLoopCounter = + D ? D->getCanonicalDecl() : D; + } + /// Gets the possible loop counter decl. + const Decl *getPossiblyLoopCunter() const { + return Stack.back().first.back().PossiblyLoopCounter; + } /// Start new OpenMP region stack in new non-capturing function. void pushFunction() { const FunctionScopeInfo *CurFnScope = SemaRef.getCurFunction(); @@ -354,7 +383,7 @@ public: return OMPD_unknown; return std::next(Stack.back().first.rbegin())->Directive; } - + /// Add requires decl to internal vector void addRequiresDecl(OMPRequiresDecl *RD) { RequiresDecls.push_back(RD); @@ -1510,8 +1539,28 @@ void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, FunctionScopesIndex -= Regions.size(); } +void Sema::startOpenMPLoop() { + assert(LangOpts.OpenMP && "OpenMP must be enabled."); + if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) + DSAStack->loopInit(); +} + bool Sema::isOpenMPPrivateDecl(const ValueDecl *D, unsigned Level) const { assert(LangOpts.OpenMP && "OpenMP is not allowed"); + if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { + if (DSAStack->getAssociatedLoops() > 0 && + !DSAStack->isLoopStarted()) { + DSAStack->resetPossibleLoopCounter(D); + DSAStack->loopStart(); + return true; + } + if ((DSAStack->getPossiblyLoopCunter() == D->getCanonicalDecl() || + DSAStack->isLoopControlVariable(D).first) && + !DSAStack->hasExplicitDSA( + D, [](OpenMPClauseKind K) { return K != OMPC_private; }, Level) && + !isOpenMPSimdDirective(DSAStack->getCurrentDirective())) + return true; + } return DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) || (DSAStack->isClauseParsingMode() && @@ -2021,6 +2070,30 @@ class DSAAttrChecker final : public StmtVisitor { Sema::VarsWithInheritedDSAType VarsWithInheritedDSA; llvm::SmallDenseSet ImplicitDeclarations; + void VisitSubCaptures(OMPExecutableDirective *S) { + // Check implicitly captured variables. + if (!S->hasAssociatedStmt() || !S->getAssociatedStmt()) + return; + for (const CapturedStmt::Capture &Cap : + S->getInnermostCapturedStmt()->captures()) { + if (!Cap.capturesVariable()) + continue; + VarDecl *VD = Cap.getCapturedVar(); + // Do not try to map the variable if it or its sub-component was mapped + // already. + if (isOpenMPTargetExecutionDirective(Stack->getCurrentDirective()) && + Stack->checkMappableExprComponentListsForDecl( + VD, /*CurrentRegionOnly=*/true, + [](OMPClauseMappableExprCommon::MappableExprComponentListRef, + OpenMPClauseKind) { return true; })) + continue; + DeclRefExpr *DRE = buildDeclRefExpr( + SemaRef, VD, VD->getType().getNonLValueExprType(SemaRef.Context), + Cap.getLocation(), /*RefersToCapture=*/true); + Visit(DRE); + } + } + public: void VisitDeclRefExpr(DeclRefExpr *E) { if (E->isTypeDependent() || E->isValueDependent() || @@ -2253,25 +2326,9 @@ public: for (Stmt *C : S->children()) { if (C) { if (auto *OED = dyn_cast(C)) { - // Check implicitly captured vriables in the task-based directives to + // Check implicitly captured variables in the task-based directives to // check if they must be firstprivatized. - if (!OED->hasAssociatedStmt()) - continue; - const Stmt *AS = OED->getAssociatedStmt(); - if (!AS) - continue; - for (const CapturedStmt::Capture &Cap : - cast(AS)->captures()) { - if (Cap.capturesVariable()) { - DeclRefExpr *DRE = buildDeclRefExpr( - SemaRef, Cap.getCapturedVar(), - Cap.getCapturedVar()->getType().getNonLValueExprType( - SemaRef.Context), - Cap.getLocation(), - /*RefersToCapture=*/true); - Visit(DRE); - } - } + VisitSubCaptures(OED); } else { Visit(C); } @@ -4522,6 +4579,15 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { } } DSAStack->addLoopControlVariable(D, VD); + const Decl *LD = DSAStack->getPossiblyLoopCunter(); + if (LD != D->getCanonicalDecl()) { + DSAStack->resetPossibleLoopCounter(); + if (auto *Var = dyn_cast_or_null(LD)) + MarkDeclarationsReferencedInExpr( + buildDeclRefExpr(*this, const_cast(Var), + Var->getType().getNonLValueExprType(Context), + ForLoc, /*RefersToCapture=*/true)); + } } } DSAStack->setAssociatedLoops(AssociatedLoops - 1); diff --git a/clang/test/OpenMP/debug-info-openmp-array.cpp b/clang/test/OpenMP/debug-info-openmp-array.cpp index e6de476..1db79e6 100644 --- a/clang/test/OpenMP/debug-info-openmp-array.cpp +++ b/clang/test/OpenMP/debug-info-openmp-array.cpp @@ -13,4 +13,4 @@ void f(int m) { } } -// CHECK: !DILocalVariable(name: "cen", arg: 6 +// CHECK: !DILocalVariable(name: "cen", arg: 5 diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 0f70728..1036b61 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -267,8 +267,8 @@ void test_auto(float *a, float *b, float *c, float *d) { unsigned int x = 0; unsigned int y = 0; #pragma omp parallel for schedule(auto) collapse(2) -// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), -// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) +// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), +// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], // CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID:%.+]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) // @@ -311,8 +311,8 @@ void test_auto(float *a, float *b, float *c, float *d) { void runtime(float *a, float *b, float *c, float *d) { int x = 0; #pragma omp parallel for collapse(2) schedule(runtime) -// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), -// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) +// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), +// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID:%.+]], i32 37, i32 0, i32 199, i32 1, i32 1) // diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index 26033e7..6471f57 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -49,7 +49,7 @@ int main(int argc, char **argv) { for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 // CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 diff --git a/clang/test/OpenMP/taskloop_firstprivate_messages.cpp b/clang/test/OpenMP/taskloop_firstprivate_messages.cpp index 7e1b818..a34d9bc 100644 --- a/clang/test/OpenMP/taskloop_firstprivate_messages.cpp +++ b/clang/test/OpenMP/taskloop_firstprivate_messages.cpp @@ -297,9 +297,12 @@ int main(int argc, char **argv) { #pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}} for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}} foo(); -#pragma omp parallel reduction(+ : i) // expected-note 2 {{defined as reduction}} +#pragma omp parallel reduction(+ : i) // expected-note {{defined as reduction}} #pragma omp taskloop firstprivate(i) //expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}} - for (i = 0; i < argc; ++i) // expected-error {{reduction variables may not be accessed in an explicit task}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp taskloop firstprivate(i) //expected-note {{defined as firstprivate}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}} foo(); #pragma omp parallel #pragma omp taskloop firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} diff --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp index a9f79a5..0eff06d8 100644 --- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp @@ -147,7 +147,7 @@ sum = 0.0; // CHECK: [[DIV:%.*]] = sdiv i32 [[ADD11]], 1 // CHECK: [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK: store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], -// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*)) +// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 64, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*)) // CHECK: call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) // CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* diff --git a/clang/test/OpenMP/taskloop_simd_firstprivate_messages.cpp b/clang/test/OpenMP/taskloop_simd_firstprivate_messages.cpp index 66c9e7a..3fb5ad1 100644 --- a/clang/test/OpenMP/taskloop_simd_firstprivate_messages.cpp +++ b/clang/test/OpenMP/taskloop_simd_firstprivate_messages.cpp @@ -301,6 +301,9 @@ int main(int argc, char **argv) { #pragma omp taskloop simd firstprivate(i) // expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}} for (i = 0; i < argc; ++i) // expected-error {{reduction variables may not be accessed in an explicit task}} foo(); +#pragma omp taskloop simd firstprivate(i) //expected-note {{defined as firstprivate}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}} + foo(); #pragma omp parallel #pragma omp taskloop simd firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} for (i = 0; i < argc; ++i) -- 2.7.4