From 8cbe0a6b626eb7e158f339c20f4af74996d9d4ac Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 26 Feb 2015 10:27:34 +0000 Subject: [PATCH] [OPENMP] Fixed codegen for directives without function outlining. Fixed crash on codegen for directives like 'omp for', 'omp single' etc. inside of the 'omp parallel', 'omp task' etc. regions. llvm-svn: 230621 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 167 ++++++++++++++++++++++++--------- clang/lib/CodeGen/CGOpenMPRuntime.h | 13 ++- clang/lib/CodeGen/CGStmtOpenMP.cpp | 39 +------- clang/lib/CodeGen/CodeGenFunction.h | 10 +- clang/test/OpenMP/critical_codegen.cpp | 9 ++ clang/test/OpenMP/for_codegen.cpp | 8 ++ clang/test/OpenMP/master_codegen.cpp | 9 ++ clang/test/OpenMP/simd_codegen.cpp | 8 ++ clang/test/OpenMP/single_codegen.cpp | 9 ++ 9 files changed, 185 insertions(+), 87 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8780bdd..51865a6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -27,30 +27,46 @@ using namespace clang; using namespace CodeGen; namespace { -/// \brief API for captured statement code generation in OpenMP constructs. +/// \brief Base class for handling code generation inside OpenMP regions. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: - CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS, - const VarDecl *ThreadIDVar) - : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar), - Directive(D) { - assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); - } + CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS) + : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {} - /// \brief Gets a variable or parameter for storing global thread id + CGOpenMPRegionInfo(const OMPExecutableDirective &D) + : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {} + + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. - const VarDecl *getThreadIDVariable() const { return ThreadIDVar; } + virtual const VarDecl *getThreadIDVariable() const = 0; - /// \brief Gets an LValue for the current ThreadID variable. + /// \brief Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + /// \brief Emit the captured statement body. + virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; + static bool classof(const CGCapturedStmtInfo *Info) { return Info->getKind() == CR_OpenMP; } +protected: + /// \brief OpenMP executable directive associated with the region. + const OMPExecutableDirective &Directive; +}; - /// \brief Emit the captured statement body. - void EmitBody(CodeGenFunction &CGF, Stmt *S) override; - +/// \brief API for captured statement code generation in OpenMP constructs. +class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { +public: + CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D, + const CapturedStmt &CS, const VarDecl *ThreadIDVar) + : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) { + assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); + } + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + virtual const VarDecl *getThreadIDVariable() const override { + return ThreadIDVar; + } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } @@ -58,18 +74,62 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; - /// \brief OpenMP executable directive associated with the region. - const OMPExecutableDirective &Directive; +}; + +/// \brief API for inlined captured statement code generation in OpenMP +/// constructs. +class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { +public: + CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D, + CodeGenFunction::CGCapturedStmtInfo *OldCSI) + : CGOpenMPRegionInfo(D), OldCSI(OldCSI), + OuterRegionInfo(dyn_cast_or_null(OldCSI)) {} + // \brief Retrieve the value of the context parameter. + virtual llvm::Value *getContextValue() const override { + if (OuterRegionInfo) + return OuterRegionInfo->getContextValue(); + llvm_unreachable("No context value for inlined OpenMP region"); + } + /// \brief Lookup the captured field decl for a variable. + virtual const FieldDecl *lookup(const VarDecl *VD) const override { + if (OuterRegionInfo) + return OuterRegionInfo->lookup(VD); + llvm_unreachable("Trying to reference VarDecl that is neither local nor " + "captured in outer OpenMP region"); + } + virtual FieldDecl *getThisFieldDecl() const override { + if (OuterRegionInfo) + return OuterRegionInfo->getThisFieldDecl(); + return nullptr; + } + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + virtual const VarDecl *getThreadIDVariable() const override { + if (OuterRegionInfo) + return OuterRegionInfo->getThreadIDVariable(); + return nullptr; + } + /// \brief Get the name of the capture helper. + virtual StringRef getHelperName() const override { + llvm_unreachable("No helper name for inlined OpenMP construct"); + } + + CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } + +private: + /// \brief CodeGen info about outer OpenMP region. + CodeGenFunction::CGCapturedStmtInfo *OldCSI; + CGOpenMPRegionInfo *OuterRegionInfo; }; } // namespace LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.MakeNaturalAlignAddrLValue( - CGF.GetAddrOfLocalVar(ThreadIDVar), - CGF.getContext().getPointerType(ThreadIDVar->getType())); + CGF.GetAddrOfLocalVar(getThreadIDVariable()), + CGF.getContext().getPointerType(getThreadIDVariable()->getType())); } -void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) { +void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPPrivateClause(Directive, PrivateScope); CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); @@ -98,7 +158,7 @@ CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar) { const CapturedStmt *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar); + CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar); CGF.CapturedStmtInfo = &CGInfo; return CGF.GenerateCapturedStmtFunction(*CS); } @@ -205,32 +265,34 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, } if (auto OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { - // Check if this an outlined function with thread id passed as argument. - auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable(); - auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); - LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), - ThreadIDVar->getType()); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); - // If value loaded in entry block, cache it and use it everywhere in - // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; + if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); + LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), + ThreadIDVar->getType()); + ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + // If value loaded in entry block, cache it and use it everywhere in + // function. + if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; + } + return ThreadID; } - } else { - // This is not an outlined function region - need to call __kmpc_int32 - // kmpc_global_thread_num(ident_t *loc). - // Generate thread id value and cache this value for use across the - // function. - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - ThreadID = CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_global_thread_num), - emitUpdateLocation(CGF, Loc)); - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; } + + // This is not an outlined function region - need to call __kmpc_int32 + // kmpc_global_thread_num(ident_t *loc). + // Generate thread id value and cache this value for use across the + // function. + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + ThreadID = + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), + emitUpdateLocation(CGF, Loc)); + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; return ThreadID; } @@ -703,8 +765,10 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc) { if (auto OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) - return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), - SourceLocation()).getScalarVal(); + if (OMPRegionInfo->getThreadIDVariable()) + return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), + Loc).getScalarVal(); + auto ThreadID = getThreadID(CGF, Loc); auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); @@ -979,3 +1043,16 @@ void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef, emitUpdateLocation(CGF, Loc)); } +InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &D) + : CGF(CGF) { + CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo); +} + +InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() { + auto *OldCSI = + cast(CGF.CapturedStmtInfo)->getOldCSI(); + delete CGF.CapturedStmtInfo; + CGF.CapturedStmtInfo = OldCSI; +} + diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index b7c92f0..97aa5b8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -43,9 +43,6 @@ class CodeGenFunction; class CodeGenModule; class CGOpenMPRuntime { -public: - -private: enum OpenMPRTLFunction { /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, /// kmpc_micro microtask, ...); @@ -416,6 +413,16 @@ public: virtual void emitFlush(CodeGenFunction &CGF, ArrayRef Vars, SourceLocation Loc); }; + +/// \brief RAII for emitting code of CapturedStmt without function outlining. +class InlinedOpenMPRegionRAII { + CodeGenFunction &CGF; + +public: + InlinedOpenMPRegionRAII(CodeGenFunction &CGF, + const OMPExecutableDirective &D); + ~InlinedOpenMPRegionRAII(); +}; } // namespace CodeGen } // namespace clang diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 00451d0..daf5fcc 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -20,35 +20,6 @@ using namespace clang; using namespace CodeGen; -namespace { -/// \brief RAII for emitting code of CapturedStmt without function outlining. -class InlinedOpenMPRegion { - CodeGenFunction &CGF; - CodeGenFunction::CGCapturedStmtInfo *PrevCapturedStmtInfo; - const Decl *StoredCurCodeDecl; - - /// \brief A class to emit CapturedStmt construct as inlined statement without - /// generating a function for outlined code. - class CGInlinedOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { - public: - CGInlinedOpenMPRegionInfo() : CGCapturedStmtInfo() {} - }; - -public: - InlinedOpenMPRegion(CodeGenFunction &CGF, const Stmt *S) - : CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo), - StoredCurCodeDecl(CGF.CurCodeDecl) { - CGF.CurCodeDecl = cast(S)->getCapturedDecl(); - CGF.CapturedStmtInfo = new CGInlinedOpenMPRegionInfo(); - } - ~InlinedOpenMPRegion() { - delete CGF.CapturedStmtInfo; - CGF.CapturedStmtInfo = PrevCapturedStmtInfo; - CGF.CurCodeDecl = StoredCurCodeDecl; - } -}; -} // namespace - //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// @@ -446,7 +417,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { } } - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); + InlinedOpenMPRegionRAII Region(*this, S); RunCleanupsScope DirectiveScope(*this); CGDebugInfo *DI = getDebugInfo(); @@ -679,7 +650,7 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { } void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); + InlinedOpenMPRegionRAII Region(*this, S); RunCleanupsScope DirectiveScope(*this); CGDebugInfo *DI = getDebugInfo(); @@ -709,7 +680,7 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) { void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); + InlinedOpenMPRegionRAII Region(*this, S); RunCleanupsScope Scope(*this); EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); EnsureInsertPoint(); @@ -718,7 +689,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); + InlinedOpenMPRegionRAII Region(*this, S); RunCleanupsScope Scope(*this); EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); EnsureInsertPoint(); @@ -728,7 +699,7 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { CGM.getOpenMPRuntime().emitCriticalRegion( *this, S.getDirectiveName().getAsString(), [&]() -> void { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); + InlinedOpenMPRegionRAII Region(*this, S); RunCleanupsScope Scope(*this); EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); EnsureInsertPoint(); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7571332..c0368aa 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -194,22 +194,22 @@ public: void setContextValue(llvm::Value *V) { ThisValue = V; } // \brief Retrieve the value of the context parameter. - llvm::Value *getContextValue() const { return ThisValue; } + virtual llvm::Value *getContextValue() const { return ThisValue; } /// \brief Lookup the captured field decl for a variable. - const FieldDecl *lookup(const VarDecl *VD) const { + virtual const FieldDecl *lookup(const VarDecl *VD) const { return CaptureFields.lookup(VD); } - bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; } - FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; } + bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; } + virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; } static bool classof(const CGCapturedStmtInfo *) { return true; } /// \brief Emit the captured statement body. - virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) { + virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) { RegionCounter Cnt = CGF.getPGORegionCounter(S); Cnt.beginRegion(CGF.Builder); CGF.EmitStmt(S); diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp index dda532c..37a062d 100644 --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -35,4 +35,13 @@ int main() { return a; } +// CHECK-LABEL: parallel_critical +void parallel_critical(float *a) { +#pragma omp parallel +#pragma omp critical + // CHECK-NOT: __kmpc_global_thread_num + for (unsigned i = 131071; i <= 2147483647; i += 127) + a[i] += i; +} + #endif diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp index badc5bd..3193d84 100644 --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -146,5 +146,13 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK: ret void } +void parallel_for(float *a) { +#pragma omp parallel +#pragma omp for schedule(static, 5) + // CHECK-NOT: __kmpc_global_thread_num + for (unsigned i = 131071; i <= 2147483647; i += 127) + a[i] += i; +} + #endif // HEADER diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp index d354bae..38eaa33 100644 --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -43,4 +43,13 @@ int main() { return a; } +// CHECK-LABEL: parallel_master +void parallel_master(float *a) { +#pragma omp parallel +#pragma omp master + // CHECK-NOT: __kmpc_global_thread_num + for (unsigned i = 131071; i <= 2147483647; i += 127) + a[i] += i; +} + #endif diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp index b8073c29..fac1427 100644 --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -403,5 +403,13 @@ void widened(float *a, float *b, float *c, float *d) { // CHECK: ret void } +void parallel_simd(float *a) { +#pragma omp parallel +#pragma omp simd + // CHECK-NOT: __kmpc_global_thread_num + for (unsigned i = 131071; i <= 2147483647; i += 127) + a[i] += i; +} + #endif // HEADER diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp index e67af0f..b98da37 100644 --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -43,4 +43,13 @@ int main() { return a; } +// CHECK-LABEL: parallel_single +void parallel_single(float *a) { +#pragma omp parallel +#pragma omp single + // CHECK-NOT: __kmpc_global_thread_num + for (unsigned i = 131071; i <= 2147483647; i += 127) + a[i] += i; +} + #endif -- 2.7.4