From 2adecff1aad61968c78b4ebeb0a5c2d016ea255f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 21 Sep 2018 14:22:53 +0000 Subject: [PATCH] [OPENMP][NVPTX] Enable support for lastprivates in SPMD constructs. Previously we could not use lastprivates in SPMD constructs, patch allows supporting lastprivates in SPMD with uninitialized runtime. llvm-svn: 342738 --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 196 +++++++++++++-------- clang/test/OpenMP/nvptx_SPMD_codegen.cpp | 4 +- .../nvptx_force_full_runtime_SPMD_codegen.cpp | 4 +- ...arget_teams_distribute_parallel_for_codegen.cpp | 29 +-- ..._teams_distribute_parallel_for_simd_codegen.cpp | 25 +-- 5 files changed, 157 insertions(+), 101 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 068ba56..0553c18a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -179,6 +179,54 @@ enum NamedBarrier : unsigned { NB_Parallel = 1, }; +typedef std::pair VarsDataTy; +static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { + return P1.first > P2.first; +} + +static RecordDecl *buildRecordForGlobalizedVars( + ASTContext &C, ArrayRef EscapedDecls, + llvm::SmallDenseMap + &MappedDeclsFields) { + if (EscapedDecls.empty()) + return nullptr; + SmallVector GlobalizedVars; + for (const ValueDecl *D : EscapedDecls) + GlobalizedVars.emplace_back(C.getDeclAlign(D), D); + std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), + stable_sort_comparator); + // Build struct _globalized_locals_ty { + // /* globalized vars */ + // }; + RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); + GlobalizedRD->startDefinition(); + for (const auto &Pair : GlobalizedVars) { + const ValueDecl *VD = Pair.second; + QualType Type = VD->getType(); + if (Type->isLValueReferenceType()) + Type = C.getPointerType(Type.getNonReferenceType()); + else + Type = Type.getNonReferenceType(); + SourceLocation Loc = VD->getLocation(); + auto *Field = + FieldDecl::Create(C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, + C.getTrivialTypeSourceInfo(Type, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + GlobalizedRD->addDecl(Field); + if (VD->hasAttrs()) { + for (specific_attr_iterator I(VD->getAttrs().begin()), + E(VD->getAttrs().end()); + I != E; ++I) + Field->addAttr(*I); + } + MappedDeclsFields.try_emplace(VD, Field); + } + GlobalizedRD->completeDefinition(); + return GlobalizedRD; +} + /// Get the list of variables that can escape their declaration context. class CheckVarsEscapingDeclContext final : public ConstStmtVisitor { @@ -292,51 +340,11 @@ class CheckVarsEscapingDeclContext final } } - typedef std::pair VarsDataTy; - static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { - return P1.first > P2.first; - } - void buildRecordForGlobalizedVars() { assert(!GlobalizedRD && "Record for globalized variables is built already."); - if (EscapedDecls.empty()) - return; - ASTContext &C = CGF.getContext(); - SmallVector GlobalizedVars; - for (const ValueDecl *D : EscapedDecls) - GlobalizedVars.emplace_back(C.getDeclAlign(D), D); - std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), - stable_sort_comparator); - // Build struct _globalized_locals_ty { - // /* globalized vars */ - // }; - GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); - GlobalizedRD->startDefinition(); - for (const auto &Pair : GlobalizedVars) { - const ValueDecl *VD = Pair.second; - QualType Type = VD->getType(); - if (Type->isLValueReferenceType()) - Type = C.getPointerType(Type.getNonReferenceType()); - else - Type = Type.getNonReferenceType(); - SourceLocation Loc = VD->getLocation(); - auto *Field = FieldDecl::Create( - C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, - C.getTrivialTypeSourceInfo(Type, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - GlobalizedRD->addDecl(Field); - if (VD->hasAttrs()) { - for (specific_attr_iterator I(VD->getAttrs().begin()), - E(VD->getAttrs().end()); - I != E; ++I) - Field->addAttr(*I); - } - MappedDeclsFields.try_emplace(VD, Field); - } - GlobalizedRD->completeDefinition(); + GlobalizedRD = ::buildRecordForGlobalizedVars( + CGF.getContext(), EscapedDecls.getArrayRef(), MappedDeclsFields); } public: @@ -672,13 +680,6 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, return false; } -/// Checks if the directive is the distribute clause with the lastprivate -/// clauses. This construct does not support SPMD execution mode. -static bool hasDistributeWithLastprivateClauses(const OMPExecutableDirective &D) { - return isOpenMPDistributeDirective(D.getDirectiveKind()) && - D.hasClausesOfKind(); -} - /// Check for inner (nested) SPMD construct, if any static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { @@ -692,8 +693,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, switch (D.getDirectiveKind()) { case OMPD_target: if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir) && - !hasDistributeWithLastprivateClauses(*NestedDir)) + !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) return true; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( @@ -704,16 +704,14 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NND) && - !hasDistributeWithLastprivateClauses(*NND)) + !hasParallelIfNumThreadsClause(Ctx, *NND)) return true; } } return false; case OMPD_target_teams: return isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir) && - !hasDistributeWithLastprivateClauses(*NestedDir); + !hasParallelIfNumThreadsClause(Ctx, *NestedDir); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -786,8 +784,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: // Distribute with lastprivates requires non-SPMD execution mode. - return !hasParallelIfNumThreadsClause(Ctx, D) && - !hasDistributeWithLastprivateClauses(D); + return !hasParallelIfNumThreadsClause(Ctx, D); case OMPD_target_simd: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: @@ -1799,28 +1796,88 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( return OutlinedFun; } +/// Get list of lastprivate variables from the teams distribute ... or +/// teams {distribute ...} directives. +static void +getDistributeLastprivateVars(const OMPExecutableDirective &D, + llvm::SmallVectorImpl &Vars) { + assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && + "expected teams directive."); + const OMPExecutableDirective *Dir = &D; + if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { + if (const Stmt *S = getSingleCompoundChild( + D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true))) { + Dir = dyn_cast(S); + if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) + Dir = nullptr; + } + } + if (!Dir) + return; + for (const OMPLastprivateClause *C : + Dir->getClausesOfKind()) { + for (const Expr *E : C->getVarRefs()) { + const auto *DE = cast(E->IgnoreParens()); + Vars.push_back(cast(DE->getDecl()->getCanonicalDecl())); + } + } +} + llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { SourceLocation Loc = D.getBeginLoc(); + const RecordDecl *GlobalizedRD = nullptr; + llvm::SmallVector LastPrivates; + llvm::SmallDenseMap MappedDeclsFields; + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { + getDistributeLastprivateVars(D, LastPrivates); + if (!LastPrivates.empty()) + GlobalizedRD = buildRecordForGlobalizedVars( + CGM.getContext(), LastPrivates, MappedDeclsFields); + } + // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { SourceLocation &Loc; + const RecordDecl *GlobalizedRD; + llvm::SmallDenseMap + &MappedDeclsFields; public: - NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {} + NVPTXPrePostActionTy( + SourceLocation &Loc, const RecordDecl *GlobalizedRD, + llvm::SmallDenseMap + &MappedDeclsFields) + : Loc(Loc), GlobalizedRD(GlobalizedRD), + MappedDeclsFields(MappedDeclsFields) {} void Enter(CodeGenFunction &CGF) override { - static_cast(CGF.CGM.getOpenMPRuntime()) - .emitGenericVarsProlog(CGF, Loc); + auto &Rt = + static_cast(CGF.CGM.getOpenMPRuntime()); + if (GlobalizedRD) { + auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; + I->getSecond().GlobalRecord = GlobalizedRD; + I->getSecond().MappedParams = + llvm::make_unique(); + DeclToAddrMapTy &Data = I->getSecond().LocalVarData; + for (const auto &Pair : MappedDeclsFields) { + assert(Pair.getFirst()->isCanonicalDecl() && + "Expected canonical declaration"); + Data.insert(std::make_pair( + Pair.getFirst(), + std::make_pair(Pair.getSecond(), Address::invalid()))); + } + } + Rt.emitGenericVarsProlog(CGF, Loc); } void Exit(CodeGenFunction &CGF) override { static_cast(CGF.CGM.getOpenMPRuntime()) .emitGenericVarsEpilog(CGF); } - } Action(Loc); - if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) - CodeGen.setAction(Action); + } Action(Loc, GlobalizedRD, MappedDeclsFields); + CodeGen.setAction(Action); llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); llvm::Function *OutlinedFun = cast(OutlinedFunVal); @@ -1834,7 +1891,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc, bool WithSPMDCheck) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic && + getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) return; CGBuilderTy &Bld = CGF.Builder; @@ -1892,8 +1950,6 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, I->getSecond().GlobalRecordAddr = Phi; I->getSecond().IsInSPMDModeFlag = IsSPMD; } else { - assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD && - "Expected Non-SPMD construct."); // TODO: allow the usage of shared memory to be controlled by // the user, for now, default to global. llvm::Value *GlobalRecordSizeArg[] = { @@ -1967,7 +2023,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic && + getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) return; const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); @@ -1997,8 +2054,6 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else { - assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD && - "Expected Non-SPMD mode."); CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), I->getSecond().GlobalRecordAddr); @@ -3950,6 +4005,9 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, } else if (const auto *CD = dyn_cast(D)) { Body = CD->getBody(); NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP; + if (NeedToDelayGlobalization && + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) + return; } if (!Body) return; diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index 615dc30..c247cc3 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -9,8 +9,6 @@ #define HEADER // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1 -// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) @@ -42,7 +40,7 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call void @__kmpc_kernel_init( +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) diff --git a/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp index 288410a..395a2d4 100644 --- a/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp @@ -9,8 +9,6 @@ #define HEADER // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1 -// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) @@ -42,7 +40,7 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call void @__kmpc_kernel_init( +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index bbcb19d..2fdcbe6 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -8,13 +8,12 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1 -// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0 +// Check that the execution mode of all 5 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0 #define N 1000 #define M 10 @@ -68,14 +67,16 @@ int bar(int n){ return a; } -// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker() - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) +// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* +// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1) +// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void // CHECK: define internal void [[OUTL1]]( @@ -127,7 +128,7 @@ int bar(int n){ // CHECK: call void @__kmpc_for_static_fini( // CHECK: ret void -// CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) +// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) // CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}}) // CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}}) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 86768b0..0f0f12c 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -8,12 +8,11 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1 -// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0 +// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 #define N 1000 #define M 10 @@ -63,14 +62,16 @@ int bar(int n){ return a; } -// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l31_worker() - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l31( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) +// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* +// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1) +// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void // CHECK: define internal void [[OUTL1]]( -- 2.7.4