From df093e7b45b345493b545b5639c5b606689764e7 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 11 May 2018 19:45:14 +0000 Subject: [PATCH] [OPENMP, NVPTX] Do not use SPMD mode for target simd and target teams distribute simd directives. Directives `target simd` and `target teams distribute simd` must be executed in non-SPMD mode. llvm-svn: 332129 --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 32 ++++++++---------- clang/test/OpenMP/nvptx_target_simd_codegen.cpp | 30 ++++++++--------- .../nvptx_target_teams_distribute_simd_codegen.cpp | 38 ++++++++++------------ 3 files changed, 43 insertions(+), 57 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 98d8b0f..bad4a8b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -628,9 +628,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if ((isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir)) || - isOpenMPSimdDirective(DKind)) + if (isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NestedDir)) return true; if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); @@ -639,9 +638,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, ChildStmt = getSingleCompoundChild(Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); - if ((isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND)) || - isOpenMPSimdDirective(DKind)) + if (isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NND)) return true; if (DKind == OMPD_distribute) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); @@ -652,18 +650,16 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, return false; if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); - return (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND)) || - isOpenMPSimdDirective(DKind); + return isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NND); } } } } return false; case OMPD_target_teams: - if ((isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir)) || - isOpenMPSimdDirective(DKind)) + if (isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NestedDir)) return true; if (DKind == OMPD_distribute) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); @@ -672,16 +668,14 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, ChildStmt = getSingleCompoundChild(Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); - return (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND)) || - isOpenMPSimdDirective(DKind); + return isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NND); } } return false; case OMPD_target_teams_distribute: - return (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir)) || - isOpenMPSimdDirective(DKind); + return isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NestedDir); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -755,7 +749,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, return !hasParallelIfClause(Ctx, D); case OMPD_target_simd: case OMPD_target_teams_distribute_simd: - return true; + return false; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp index 001eb68..5f307d8 100644 --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -8,11 +8,11 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 0 +// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 1 #define N 1000 @@ -54,38 +54,34 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l25}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l30}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l35}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l40}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK: [[RES:%.+]] = call i32 @__kmpc_nvptx_simd_reduce_nowait(i32 %{{.+}}, i32 1, i{{64|32}} {{8|4}}, i8* %{{.+}}, void (i8*, i16, i16, i16)* @{{.+}}, void (i8*, i32)* @{{.+}}) // CHECK: switch i32 [[RES]] // CHECK: call void @__kmpc_nvptx_end_reduce_nowait(i32 %{{.+}}) -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp index a78a01a..48f3147 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -8,11 +8,11 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 +// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 1 #define N 1000 #define M 10 @@ -62,38 +62,34 @@ int bar(int n){ return a; } -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l36( +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l41( +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void -// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) +// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l46({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void #endif -- 2.7.4