From ec9563c54ed25e9f9cbe60985399212d50bd801d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 21 Jul 2020 15:20:16 -0400 Subject: [PATCH] [OPENMP]Fix PR37671: Privatize local(private) variables in untied tasks. Summary: In untied tasks, need to allocate the space for local variales, declared in task region, when the memory for task data is allocated. THe function can be interrupted and we can exit from the function in untied task switch. Need to keep the state of the local variables in this case. Also, the compiler should not call cleanup when exiting in untied task switch until the real exit out of the declaration scope is met during execution. Reviewers: jdoerfert Subscribers: yaxunl, guansong, cfe-commits, sstefan1, caomhin Tags: #clang Differential Revision: https://reviews.llvm.org/D84457 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 183 +++++++++++++++++++++------------- clang/lib/CodeGen/CGOpenMPRuntime.h | 18 ++++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 65 +++++++++++- clang/test/OpenMP/task_codegen.cpp | 34 ++++++- 4 files changed, 224 insertions(+), 76 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0582de5..addf6c3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -180,7 +180,7 @@ public: UntiedCodeGen(CGF); CodeGenFunction::JumpDest CurPoint = CGF.getJumpDestInCurrentScope(".untied.next."); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBranch(CGF.ReturnBlock.getBlock()); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), CGF.Builder.GetInsertBlock()); @@ -3370,6 +3370,7 @@ struct PrivateHelpersTy { const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} + PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} const Expr *OriginalRef = nullptr; const VarDecl *Original = nullptr; const VarDecl *PrivateCopy = nullptr; @@ -3390,6 +3391,10 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { for (const auto &Pair : Privates) { const VarDecl *VD = Pair.second.Original; QualType Type = VD->getType().getNonReferenceType(); + // If the private variable is a local variable with lvalue ref type, + // allocate the pointer instead of the pointee type. + if (!Pair.second.OriginalRef && VD->getType()->isLValueReferenceType()) + Type = C.getPointerType(Type); FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator I(VD->getAttrs().begin()), @@ -3643,10 +3648,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, /// \endcode static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, - ArrayRef PrivateVars, - ArrayRef FirstprivateVars, - ArrayRef LastprivateVars, - QualType PrivatesQTy, + const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef Privates) { ASTContext &C = CGM.getContext(); FunctionArgList Args; @@ -3655,9 +3657,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(PrivatesQTy).withConst().withRestrict(), ImplicitParamDecl::Other); Args.push_back(&TaskPrivatesArg); - llvm::DenseMap PrivateVarsPos; + llvm::DenseMap, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (const Expr *E : PrivateVars) { + for (const Expr *E : Data.PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3668,7 +3670,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : FirstprivateVars) { + for (const Expr *E : Data.FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3679,7 +3681,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : LastprivateVars) { + for (const Expr *E : Data.LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3690,6 +3692,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = C.getPointerType(Ty); + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), + ImplicitParamDecl::Other)); + PrivateVarsPos[VD] = Counter; + ++Counter; + } const auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); llvm::FunctionType *TaskPrivatesMapTy = @@ -3945,16 +3958,16 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, /// Checks if destructor function is required to be generated. /// \return true if cleanups are required, false otherwise. static bool -checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { - bool NeedsCleanup = false; - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); - const auto *PrivateRD = cast(FI->getType()->getAsTagDecl()); - for (const FieldDecl *FD : PrivateRD->fields()) { - NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); - if (NeedsCleanup) - break; +checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, + ArrayRef Privates) { + for (const PrivateDataTy &P : Privates) { + if (!P.second.OriginalRef) + continue; + QualType Ty = P.second.Original->getType().getNonReferenceType(); + if (Ty.isDestructedType()) + return true; } - return NeedsCleanup; + return false; } namespace { @@ -4124,9 +4137,12 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr)); ++I; } - llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { - return L.first > R.first; - }); + for (const VarDecl *VD : Data.PrivateLocals) + Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + llvm::stable_sort(Privates, + [](const PrivateDataTy &L, const PrivateDataTy &R) { + return L.first > R.first; + }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4168,9 +4184,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, std::next(TaskFunction->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - TaskPrivatesMap = emitTaskPrivateMappingFunction( - CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, - FI->getType(), Privates); + TaskPrivatesMap = + emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { @@ -4200,7 +4215,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; if (!Privates.empty()) { - NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); + NeedsCleanup = + checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); if (NeedsCleanup) Flags = Flags | DestructorsFlag; } @@ -11223,56 +11239,64 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, if (!VD) return Address::invalid(); const VarDecl *CVD = VD->getCanonicalDecl(); - if (!CVD->hasAttr()) + if (CVD->hasAttr()) { + const auto *AA = CVD->getAttr(); + // Use the default allocation. + if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Allocator, CGM.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc), + Args, getName({CVD->getName(), ".void.addr"})); + llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, + Allocator}; + llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free); + + CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + getName({CVD->getName(), ".addr"})); + return Address(Addr, Align); + } + if (UntiedLocalVarsStack.empty()) return Address::invalid(); - const auto *AA = CVD->getAttr(); - // Use the default allocation. - if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && - !AA->getAllocator()) + const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); + auto It = UntiedData.find(VD); + if (It == UntiedData.end()) return Address::invalid(); - llvm::Value *Size; - CharUnits Align = CGM.getContext().getDeclAlign(CVD); - if (CVD->getType()->isVariablyModifiedType()) { - Size = CGF.getTypeSize(CVD->getType()); - // Align the size: ((size + align - 1) / align) * align - Size = CGF.Builder.CreateNUWAdd( - Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); - Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); - Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); - } else { - CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); - Size = CGM.getSize(Sz.alignTo(Align)); - } - llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); - assert(AA->getAllocator() && - "Expected allocator expression for non-default allocator."); - llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); - // According to the standard, the original allocator type is a enum (integer). - // Convert to pointer type, if required. - if (Allocator->getType()->isIntegerTy()) - Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); - else if (Allocator->getType()->isPointerTy()) - Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, - CGM.VoidPtrTy); - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc), - Args, getName({CVD->getName(), ".void.addr"})); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, - Allocator}; - llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_free); - CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - getName({CVD->getName(), ".addr"})); - return Address(Addr, Align); + return It->second; } CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( @@ -11307,6 +11331,21 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); } +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( + CodeGenModule &CGM, + const llvm::DenseMap, Address> &LocalVars) + : CGM(CGM), NeedToPush(!LocalVars.empty()) { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); +} + +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); +} + bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 59541f4..178acae 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -105,6 +105,7 @@ struct OMPTaskDataTy final { SmallVector ReductionOrigs; SmallVector ReductionCopies; SmallVector ReductionOps; + SmallVector, 4> PrivateLocals; struct DependData { OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; const Expr *IteratorExpr = nullptr; @@ -245,6 +246,19 @@ public: ~NontemporalDeclsRAII(); }; + /// Manages list of nontemporal decls for the specified directive. + class UntiedTaskLocalDeclsRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + UntiedTaskLocalDeclsRAII( + CodeGenModule &CGM, + const llvm::DenseMap, Address> + &LocalVars); + ~UntiedTaskLocalDeclsRAII(); + }; + /// Maps the expression for the lastprivate variable to the global copy used /// to store new value because original variables are not mapped in inner /// parallel regions. Only private copies are captured but we need also to @@ -705,6 +719,10 @@ private: /// The set is the union of all current stack elements. llvm::SmallVector NontemporalDeclsStack; + using UntiedLocalVarsAddressesMap = + llvm::DenseMap, Address>; + llvm::SmallVector UntiedLocalVarsStack; + /// Stack for list of addresses of declarations in current context marked as /// lastprivate conditional. The set is the union of all current stack /// elements. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 7a49fe0..c1def6c 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -21,6 +21,7 @@ #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -3784,6 +3785,42 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( checkForLastprivateConditionalUpdate(*this, S); } +namespace { +/// Get the list of variables declared in the context of the untied tasks. +class CheckVarsEscapingUntiedTaskDeclContext final + : public ConstStmtVisitor { + llvm::SmallVector PrivateDecls; + +public: + explicit CheckVarsEscapingUntiedTaskDeclContext() = default; + virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; + void VisitDeclStmt(const DeclStmt *S) { + if (!S) + return; + // Need to privatize only local vars, static locals can be processed as is. + for (const Decl *D : S->decls()) { + if (const auto *VD = dyn_cast_or_null(D)) + if (VD->hasLocalStorage()) + PrivateDecls.push_back(VD); + } + } + void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } + void VisitCapturedStmt(const CapturedStmt *) { return; } + void VisitLambdaExpr(const LambdaExpr *) { return; } + void VisitBlockExpr(const BlockExpr *) { return; } + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + + /// Swaps list of vars with the provided one. + ArrayRef getPrivateDecls() const { return PrivateDecls; } +}; +} // anonymous namespace + void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, @@ -3884,14 +3921,22 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); } + // Get list of local vars for untied tasks. + if (!Data.Tied) { + CheckVarsEscapingUntiedTaskDeclContext Checker; + Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); + Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), + Checker.getPrivateDecls().end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { + llvm::DenseMap, Address> UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); llvm::SmallVector, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || - !Data.LastprivateVars.empty()) { + !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; @@ -3927,6 +3972,15 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = CGF.getContext().getPointerType(Ty); + Address PrivatePtr = CGF.CreateMemTemp( + CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); + UntiedLocalVars.try_emplace(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { @@ -3945,6 +3999,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } + // Adjust mapping for internal locals by mapping actual memory instead of + // a pointer to this memory. + for (auto &Pair : UntiedLocalVars) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond() = Replacement; + } } if (Data.Reductions) { OMPPrivateScope FirstprivateScope(CGF); @@ -4039,6 +4100,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } (void)InRedScope.Privatize(); + CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM, + UntiedLocalVars); Action.Enter(CGF); BodyGen(CGF); }; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index 9226234..3c92ca7 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // @@ -258,7 +258,7 @@ int main() { a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) #pragma omp task untied { @@ -295,26 +295,54 @@ int main() { // CHECK: store i32 4, i32* [[A_PTR]] // CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: switch i32 %{{.+}}, label +// UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) +// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ + +// CHECK: [[DONE]]: +// CHECK: br label %[[CLEANUP:[^,]+]] + // CHECK: load i32*, i32** % // CHECK: store i32 1, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT:[^,]+]] +// UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]]) // CHECK: call i8* @__kmpc_omp_task_alloc( // CHECK: call i32 @__kmpc_omp_task(% // CHECK: load i32*, i32** % // CHECK: store i32 2, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] // CHECK: call i32 @__kmpc_omp_taskyield(% // CHECK: load i32*, i32** % // CHECK: store i32 3, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// s1 = S(); +// UNTIEDRT: call void [[CONSTR]](%struct.S* [[TMP:%.+]]) +// UNTIEDRT: [[DST:%.+]] = bitcast %struct.S* [[S1_ADDR]] to i8* +// UNTIEDRT: [[SRC:%.+]] = bitcast %struct.S* [[TMP]] to i8* +// UNTIEDRT: call void @llvm.memcpy.{{.+}}(i8* {{.*}}[[DST]], i8* {{.*}}[[SRC]], i64 4, i1 false) +// UNTIEDRT: call void [[DESTR:@.+]](%struct.S* [[TMP]]) // CHECK: call i32 @__kmpc_omp_taskwait(% // CHECK: load i32*, i32** % // CHECK: store i32 4, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]]) +// CHECK: br label %[[CLEANUP]] + +// CHECK: [[CLEANUP]]: +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: [[EXIT]]: +// UNTIEDRT-NEXT: ret i32 0 struct S1 { int a; -- 2.7.4