From: Alexey Bataev Date: Tue, 19 Apr 2016 16:27:55 +0000 (+0000) Subject: [OPENMP] Codegen for untied tasks. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=823acfacdfef94ebc5f97e913bfbdac535a0d2bc;p=platform%2Fupstream%2Fllvm.git [OPENMP] Codegen for untied tasks. If the untied clause is present on a task construct, any thread in the team can resume the task region after a suspension. Patch adds proper codegen for untied tasks. llvm-svn: 266754 --- diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7d12c36..828be92 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -72,6 +72,8 @@ public: /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} + CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } @@ -82,6 +84,8 @@ public: return Info->getKind() == CR_OpenMP; } + ~CGOpenMPRegionInfo() override = default; + protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; @@ -90,7 +94,7 @@ protected: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, @@ -121,14 +125,62 @@ private: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: + class UntiedTaskActionTy final : public PrePostActionTy { + bool Untied; + const VarDecl *PartIDVar; + const RegionCodeGenTy &UntiedCodeGen; + llvm::SwitchInst *UntiedSwitch = nullptr; + + public: + UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, + const RegionCodeGenTy &UntiedCodeGen) + : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} + void Enter(CodeGenFunction &CGF) override { + if (Untied) { + // Emit task switching point. + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs()); + auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); + auto *DoneBB = CGF.createBasicBlock(".untied.done."); + UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); + CGF.EmitBlock(DoneBB); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(0), + CGF.Builder.GetInsertBlock()); + emitUntiedSwitch(CGF); + } + } + void emitUntiedSwitch(CodeGenFunction &CGF) const { + if (Untied) { + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs()); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + PartIdLVal); + UntiedCodeGen(CGF); + CodeGenFunction::JumpDest CurPoint = + CGF.getJumpDestInCurrentScope(".untied.next."); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + CGF.Builder.GetInsertBlock()); + CGF.EmitBranchThroughCleanup(CurPoint); + CGF.EmitBlock(CurPoint.getBlock()); + } + } + unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } + }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) + OpenMPDirectiveKind Kind, bool HasCancel, + const UntiedTaskActionTy &Action) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar) { + ThreadIDVar(ThreadIDVar), Action(Action) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } @@ -142,6 +194,10 @@ public: /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + Action.emitUntiedSwitch(CGF); + } + static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == @@ -152,6 +208,8 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; + /// Action for emitting code for untied tasks. + const UntiedTaskActionTy &Action; }; /// \brief API for inlined captured statement code generation in OpenMP @@ -210,6 +268,11 @@ public: llvm_unreachable("No helper name for inlined OpenMP construct"); } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + if (OuterRegionInfo) + OuterRegionInfo->emitUntiedSwitch(CGF); + } + CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -217,6 +280,8 @@ public: cast(Info)->getRegionKind() == InlinedRegion; } + ~CGOpenMPInlinedRegionInfo() override = default; + private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; @@ -228,7 +293,7 @@ private: /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. -class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) @@ -257,7 +322,7 @@ static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { } /// \brief API for generation of expressions captured in a innermost OpenMP /// region. -class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo { +class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { public: CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, @@ -757,16 +822,36 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) { + auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, + PrePostActionTy &) { + auto *ThreadID = getThreadID(CGF, D.getLocStart()); + auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *TaskArgs[] = { + UpLoc, ThreadID, + CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), + TaskTVar->getType()->castAs()) + .getPointer()}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + }; + CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, + UntiedCodeGen); + CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, - InnermostKind, - cast(D).hasCancel()); + CGOpenMPTaskOutlinedRegionInfo CGInfo( + *CS, ThreadIDVar, CodeGen, InnermostKind, + cast(D).hasCancel(), Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateCapturedStmtFunction(*CS); + auto *Res = CGF.GenerateCapturedStmtFunction(*CS); + CodeGen.clearAction(); + if (!Tied) + NumberOfParts = Action.getNumberOfParts(); + return Res; } Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { @@ -1898,6 +1983,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); + if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, @@ -2951,7 +3038,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { -/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, +/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// tt->shareds); /// return 0; /// } @@ -2982,7 +3069,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, - // tt->task_data.shareds); + // tt, tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( @@ -2995,7 +3082,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); + auto *PartidParam = PartIdLVal.getPointer(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -3014,7 +3101,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, } llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, SharedsParam}; + TaskPrivatesMap, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TDBase.getAddress(), CGF.VoidPtrTy) + .getPointer(), + SharedsParam}; CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -3154,8 +3245,8 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, void CGOpenMPRuntime::emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair Final, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, ArrayRef PrivateVars, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, @@ -3390,7 +3481,8 @@ void CGOpenMPRuntime::emitTaskCall( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[] deps; - DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); + DependenciesArray = + CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); for (unsigned i = 0; i < NumDependencies; ++i) { const Expr *E = Dependences[i].second; auto Addr = CGF.EmitLValue(E); @@ -3448,8 +3540,6 @@ void CGOpenMPRuntime::emitTaskCall( // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. - // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t - // *new_task); // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence @@ -3467,18 +3557,25 @@ void CGOpenMPRuntime::emitTaskCall( DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [NumDependencies, &TaskArgs, + auto &&ThenCodeGen = [this, Tied, Loc, NumberOfParts, TDBase, KmpTaskTQTyRD, + NumDependencies, &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { - // TODO: add check for untied tasks. - auto &RT = CGF.CGM.getOpenMPRuntime(); + if (!Tied) { + auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); + auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); + } if (NumDependencies) { CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), - DepTaskArgs); + createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); } else { - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } + // Check if parent region is untied and build return for untied task; + if (auto *Region = + dyn_cast_or_null(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); }; llvm::Value *DepWaitTaskArgs[6]; @@ -4039,6 +4136,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; // Ignore return result until untied tasks are supported. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); + if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 0194618..3b384bec 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -82,6 +82,7 @@ public: Callback(CallbackFn::type>), PrePostAction(nullptr) {} void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; } + void clearAction() const { PrePostAction = nullptr; } void operator()(CodeGenFunction &CGF) const; }; @@ -455,17 +456,25 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); /// \brief Emits outlined function for the OpenMP task directive \a D. This - /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 - /// PartID, struct context_vars*). + /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* + /// TaskT). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param PartIDVar Variable for partition id in the current OpenMP untied + /// task region. + /// \param TaskTVar Variable for task_t argument. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. + /// \param Tied true if task is generated for tied task, false otherwise. + /// \param NumberOfParts Number of parts in untied task. Ignored for tied + /// tasks. /// virtual llvm::Value *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts); /// \brief Cleans up references to the objects in finished function. /// @@ -731,6 +740,7 @@ public: /// \param Tied true if the task is tied (the task is tied to the thread that /// can suspend its task region), false - untied (the task is not tied to any /// thread). + /// \param NumberOfParts Number of parts for untied task. /// \param Final Contains either constant bool value, or llvm::Value * of i1 /// type for final clause. If the value is true, the task forces all of its /// child tasks to become final and included tasks. @@ -757,8 +767,8 @@ public: virtual void emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair Final, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, ArrayRef PrivateVars, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 92c05ea..3dfef3b 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2244,6 +2244,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); auto *PartId = std::next(I); + auto *TaskT = std::next(I, 4); // The first function argument for tasks is a thread id, the second one is a // part id (0 for tied tasks, >=0 for untied task). llvm::DenseSet EmittedAsPrivate; @@ -2288,53 +2289,52 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); } } - auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + OMPPrivateScope Scope(CGF); // Set proper addresses for generated private copies. auto *CS = cast(S.getAssociatedStmt()); - { - OMPPrivateScope Scope(CGF); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { - auto *CopyFn = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); - auto *PrivatesPtr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); - // Map privates. - llvm::SmallVector, 16> PrivatePtrs; - llvm::SmallVector CallArgs; - CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { - auto *VD = cast(cast(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); - } - for (auto *E : FirstprivateVars) { - auto *VD = cast(cast(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); - } - CGF.EmitRuntimeCall(CopyFn, CallArgs); - for (auto &&Pair : PrivatePtrs) { - Address Replacement(CGF.Builder.CreateLoad(Pair.second), - CGF.getContext().getDeclAlign(Pair.first)); - Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); - } + if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector, 16> PrivatePtrs; + llvm::SmallVector CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : PrivateVars) { + auto *VD = cast(cast(E)->getDecl()); + Address PrivatePtr = CGF.CreateMemTemp( + CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (auto *E : FirstprivateVars) { + auto *VD = cast(cast(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); } - (void)Scope.Privatize(); - if (*PartId) { - // TODO: emit code for untied tasks. + CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } - CGF.EmitStmt(CS->getCapturedStmt()); } + (void)Scope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(CS->getCapturedStmt()); }; - auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( - S, *I, OMPD_task, CodeGen); // Check if we should emit tied or untied task. bool Tied = !S.getSingleClause(); + unsigned NumberOfParts; + auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts); // Check if the task is final llvm::PointerIntPair Final; if (const auto *Clause = S.getSingleClause()) { @@ -2361,9 +2361,9 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { } OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTaskCall( - *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, - CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, - FirstprivateCopies, FirstprivateInits, Dependences); + *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn, + SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, + FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences); } void CodeGenFunction::EmitOMPTaskyieldDirective( diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3408989..0147451 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1610,12 +1610,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); Sema::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), - std::make_pair(".part_id.", KmpInt32Ty), - std::make_pair(".privates.", - Context.VoidPtrTy.withConst().withRestrict()), - std::make_pair( - ".copy_fn.", - Context.getPointerType(CopyFnType).withConst().withRestrict()), + std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), + std::make_pair(".privates.", Context.VoidPtrTy.withConst()), + std::make_pair(".copy_fn.", + Context.getPointerType(CopyFnType).withConst()), + std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index b262745..3bf8253f 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -230,6 +230,19 @@ int main() { a = 4; c = 5; } +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task untied + { + S s1; +#pragma omp task + a = 4; +#pragma omp taskyield + s1 = S(); +#pragma omp taskwait + } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}* noalias) @@ -240,16 +253,42 @@ int main() { // CHECK: store i32 10, i32* %{{.+}} // CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 1, i32* [[A_PTR:@.+]] +// CHECK: store i32 1, i32* [[A_PTR]] // CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 2, i32* [[A_PTR:@.+]] +// CHECK: store i32 2, i32* [[A_PTR]] // CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 3, i32* [[A_PTR:@.+]] +// CHECK: store i32 3, i32* [[A_PTR]] // CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 4, i32* [[A_PTR:@.+]] +// CHECK: store i32 4, i32* [[A_PTR]] // CHECK: store i32 5, i32* [[C_PTR:%.+]], align 128 + +// CHECK: define internal i32 +// CHECK: store i32 4, i32* [[A_PTR]] + +// CHECK: define internal i32 [[TASK_ENTRY6]](i32, [[KMP_TASK_T]]{{.*}}* noalias) +// CHECK: switch i32 %{{.+}}, label +// CHECK: load i32*, i32** % +// CHECK: store i32 1, i32* % +// CHECK: call i32 @__kmpc_omp_task(% + +// CHECK: call i8* @__kmpc_omp_task_alloc( +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** % +// CHECK: call i32 @__kmpc_omp_task(% +// CHECK: load i32*, i32** % +// CHECK: store i32 2, i32* % +// CHECK: call i32 @__kmpc_omp_task(% + +// CHECK: call i32 @__kmpc_omp_taskyield(% +// CHECK: load i32*, i32** % +// CHECK: store i32 3, i32* % +// CHECK: call i32 @__kmpc_omp_task(% + +// CHECK: call i32 @__kmpc_omp_taskwait(% +// CHECK: load i32*, i32** % +// CHECK: store i32 4, i32* % +// CHECK: call i32 @__kmpc_omp_task(% #endif diff --git a/clang/test/OpenMP/task_firstprivate_codegen.cpp b/clang/test/OpenMP/task_firstprivate_codegen.cpp index e224414..8abc603 100644 --- a/clang/test/OpenMP/task_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/task_firstprivate_codegen.cpp @@ -422,11 +422,11 @@ int main() { // CHECK: ret void // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) - -// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, // CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) diff --git a/clang/test/OpenMP/task_private_codegen.cpp b/clang/test/OpenMP/task_private_codegen.cpp index 1455fd1..a2dc842 100644 --- a/clang/test/OpenMP/task_private_codegen.cpp +++ b/clang/test/OpenMP/task_private_codegen.cpp @@ -331,10 +331,11 @@ int main() { // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) -// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, // CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])