From dc9e7dcbb0a5c4f9d39bd9dead03385856db77cf Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 17 Apr 2019 16:53:08 +0000 Subject: [PATCH] [OPENMP][NVPTX]Run combined constructs with if clause in SPMD mode. All target-parallel-based constructs can be run in SPMD mode from now on. Even if num_threads clauses or if clauses are used, such constructs can be executed in SPMD mode. llvm-svn: 358595 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 110 +++++++++++++++++---- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 31 +----- ...tx_distribute_parallel_generic_mode_codegen.cpp | 19 ++-- clang/test/OpenMP/nvptx_target_simd_codegen.cpp | 24 ++--- .../nvptx_target_teams_distribute_simd_codegen.cpp | 34 +++---- clang/test/OpenMP/target_parallel_if_codegen.cpp | 11 ++- .../OpenMP/target_parallel_num_threads_codegen.cpp | 2 +- 7 files changed, 140 insertions(+), 91 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index da4e355..1e1251e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6657,6 +6657,47 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, CGF.getContext(), CS->getCapturedStmt()); if (const auto *Dir = dyn_cast_or_null(Child)) { if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + llvm::Value *NumThreads = nullptr; + llvm::Value *CondVal = nullptr; + // Handle if clause. If if clause present, the number of threads is + // calculated as ? ( ? : 0 ) : 1. + if (Dir->hasClausesOfKind()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const OMPIfClause *IfClause = nullptr; + for (const auto *C : Dir->getClausesOfKind()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; + } + } + if (IfClause) { + const Expr *Cond = IfClause->getCondition(); + bool Result; + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) + return CGF.Builder.getInt32(1); + } else { + CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); + if (const auto *PreInit = + cast_or_null(IfClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr()) { + CGF.EmitVarDecl(cast(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + CondVal = CGF.EvaluateExprAsBool(Cond); + } + } + } + // Check the value of num_threads clause iff if clause was not specified + // or is not evaluated to false. if (Dir->hasClausesOfKind()) { CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -6676,19 +6717,23 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, } } } - llvm::Value *NumThreads = - CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); + NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, - /*IsSigned=*/true); - return DefaultThreadLimitVal - ? CGF.Builder.CreateSelect( - CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, - NumThreads), - DefaultThreadLimitVal, NumThreads) - : NumThreads; + /*IsSigned=*/false); + if (DefaultThreadLimitVal) + NumThreads = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), + DefaultThreadLimitVal, NumThreads); + } else { + NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); } - return DefaultThreadLimitVal ? DefaultThreadLimitVal - : CGF.Builder.getInt32(0); + // Process condition of the if clause. + if (CondVal) { + NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, + CGF.Builder.getInt32(1)); + } + return NumThreads; } if (isOpenMPSimdDirective(Dir->getDirectiveKind())) return CGF.Builder.getInt32(1); @@ -6748,7 +6793,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); } if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { @@ -6775,7 +6820,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); } const CapturedStmt *CS = D.getInnermostCapturedStmt(); if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) @@ -6798,21 +6843,45 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); } return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for_simd: { + llvm::Value *CondVal = nullptr; + // Handle if clause. If if clause present, the number of threads is + // calculated as ? ( ? : 0 ) : 1. + if (D.hasClausesOfKind()) { + const OMPIfClause *IfClause = nullptr; + for (const auto *C : D.getClausesOfKind()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; + } + } + if (IfClause) { + const Expr *Cond = IfClause->getCondition(); + bool Result; + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) + return Bld.getInt32(1); + } else { + CodeGenFunction::RunCleanupsScope Scope(CGF); + CondVal = CGF.EvaluateExprAsBool(Cond); + } + } + } if (D.hasClausesOfKind()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause(); llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); } if (D.hasClausesOfKind()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); @@ -6820,14 +6889,19 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *NumThreads = CGF.EmitScalarExpr( NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); NumThreadsVal = - Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); ThreadLimitVal = ThreadLimitVal ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, ThreadLimitVal), NumThreadsVal, ThreadLimitVal) : NumThreadsVal; } - return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + if (!ThreadLimitVal) + ThreadLimitVal = Bld.getInt32(0); + if (CondVal) + return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); + return ThreadLimitVal; + } case OMPD_target_teams_distribute_simd: case OMPD_target_simd: return Bld.getInt32(1); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 0085b01..16f6c12 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -714,24 +714,6 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } -/// Check if the parallel directive has an 'if' clause with non-constant or -/// false condition. -static bool hasParallelIfClause(ASTContext &Ctx, - const OMPExecutableDirective &D, - bool StandaloneParallel) { - for (const auto *C : D.getClausesOfKind()) { - OpenMPDirectiveKind NameModifier = C->getNameModifier(); - if (NameModifier != OMPD_parallel && - (!StandaloneParallel || NameModifier != OMPD_unknown)) - continue; - const Expr *Cond = C->getCondition(); - bool Result; - if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result) - return true; - } - return false; -} - /// Check for inner (nested) SPMD construct, if any static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { @@ -745,8 +727,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir, /*StandaloneParallel=*/true)) + if (isOpenMPParallelDirective(DKind)) return true; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( @@ -757,15 +738,13 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, if (const auto *NND = dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); - if (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND, /*StandaloneParallel=*/true)) + if (isOpenMPParallelDirective(DKind)) return true; } } return false; case OMPD_target_teams: - return isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir, /*StandaloneParallel=*/true); + return isOpenMPParallelDirective(DKind); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -839,10 +818,10 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: - return !hasParallelIfClause(Ctx, D, /*StandaloneParallel=*/false); case OMPD_target_simd: - case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + return true; + case OMPD_target_teams_distribute: return false; case OMPD_parallel: case OMPD_for: diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index d9056eef..9470aa7 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -24,29 +24,22 @@ int main(int argc, char **argv) { // CHECK: [[MEM_TY:%.+]] = type { [128 x i8] } // CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null -// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 84 -// CHECK-DAG: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 1 - -// CHECK-LABEL: define internal void @__omp_offloading_{{.*}}_main_l17_worker( +// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40 +// CHECK-DAG: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 0 // CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}) -// CHECK: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 84, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 40, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* -// CHECK: [[ARGC:%.+]] = load i32, i32* %{{.+}}, align -// CHECK: [[ARGC_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: store i32 [[ARGC]], i32* [[ARGC_ADDR]], -// CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 2 +// CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK-NOT: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], // CHECK: call void @__kmpc_for_static_init_4( -// CHECK: call void @__kmpc_serialized_parallel( // CHECK: call void [[PARALLEL:@.+]]( -// CHECK: call void @__kmpc_end_serialized_parallel( // CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @ -// CHECK: call void @__kmpc_restore_team_static_memory(i16 0, i16 1) +// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 1) // CHECK: define internal void [[PARALLEL]]( // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp index 89ea173..ce4a423 100644 --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -9,10 +9,10 @@ #define HEADER // Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 0 #define N 1000 @@ -54,33 +54,33 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l25}}( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l30}}( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l35}}( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l40}}( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK-NOT: call i32 @__kmpc_nvptx_simd_reduce_nowait( // CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp index 48f3147..f86b456 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -9,10 +9,10 @@ #define HEADER // Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 #define N 1000 #define M 10 @@ -22,7 +22,7 @@ tx ftemplate(int n) { tx a[N]; short aa[N]; tx b[10]; - tx c[M][M]; + tx c[M][M]; tx f = n; tx l; int k; @@ -47,7 +47,7 @@ tx ftemplate(int n) { for(int i = 0; i < M; i++) { for(int j = 0; j < M; j++) { k = M; - c[i][j] = i+j*f+k; + c[i][j] = i + j * f + k; } } @@ -63,33 +63,33 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l36( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l41( -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void // CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l46({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, -// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 1, i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_kernel_deinit(i16 1) +// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) // CHECK: ret void #endif diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp index 03e8c34..a4fdaa6 100644 --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -163,7 +163,8 @@ int bar(int n){ // CHECK: store i8 [[FB]], i8* [[CONV]], align // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 1, i32 [[NT:%.+]]) +// CHECK-DAG: [[NT]] = select i1 %{{.+}}, i32 0, i32 1 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // @@ -189,7 +190,8 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[NT:%.+]]) +// CHECK-DAG: [[NT]] = select i1 %{{.+}}, i32 0, i32 1 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -221,7 +223,8 @@ int bar(int n){ // CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[NT:%.+]]) +// CHECK-DAG: [[NT]] = select i1 %{{.+}}, i32 0, i32 1 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -263,7 +266,7 @@ int bar(int n){ // // CHECK: define {{.*}}[[FTEMPLATE]] // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 1) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp index 7a39a3f..41a779c 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -263,7 +263,7 @@ int bar(int n){ // CHECK: store i16 [[CEV]], i16* [[CONV]], align // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align -// CHECK: [[THREADS:%.+]] = sext i16 [[T]] to i32 +// CHECK: [[THREADS:%.+]] = zext i16 [[T]] to i32 // // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 -- 2.7.4