From f59614d906b5428f3687a44ee018df5840b301dd Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Nov 2019 10:00:56 -0500 Subject: [PATCH] [OPENMP50]Add if clause in parallel for simd directive. According to OpenMP 5.0, if clause can be used in parallel for simd directive. If condition in the if clause if false, the non-vectorized version of the loop must be executed. --- clang/lib/Sema/SemaOpenMP.cpp | 7 +- clang/test/OpenMP/parallel_for_simd_codegen.cpp | 167 ++++++++++++++++++------ 2 files changed, 130 insertions(+), 44 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 59178fb..2773efc 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4538,6 +4538,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPParallelForSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_parallel); + if (LangOpts.OpenMP >= 50) + AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_parallel_sections: Res = ActOnOpenMPParallelSectionsDirective(ClausesWithImplicit, AStmt, @@ -10677,11 +10679,14 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( if (NameModifier == OMPD_unknown || NameModifier == OMPD_taskloop) CaptureRegion = OMPD_parallel; break; + case OMPD_parallel_for_simd: + if (NameModifier == OMPD_unknown || NameModifier == OMPD_simd) + CaptureRegion = OMPD_parallel; + break; case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: case OMPD_parallel_for: - case OMPD_parallel_for_simd: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams: diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp index 9585bf2..01f2b4c 100644 --- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp @@ -1,14 +1,24 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=OMP45 --check-prefix=CHECK // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=OMP50 --check-prefix=CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG + // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // expected-no-diagnostics +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifndef HEADER #define HEADER @@ -75,7 +85,7 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR:%[^,]+]] // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] -// CHECK: call void @__kmpc_dispatch_init_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK: call void @__kmpc_dispatch_init_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32 {{35|1073741859}}, i32 0, i32 8, i32 1, i32 1) // CHECK: [[NEXT:%.+]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32* %{{.+}}, i32* [[LB:%.+]], i32* [[UB:%.+]], i32* %{{.+}}) // CHECK: [[COND:%.+]] = icmp ne i32 [[NEXT]], 0 // CHECK: br i1 [[COND]], label %[[CONT:.+]], label %[[END:.+]] @@ -386,6 +396,51 @@ void inst_templ1() { templ1 (a, z); } +// OMP50: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) +// OMP50: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP50: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 +// OMP50: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] +// OMP50: [[TRUE]]: +// OMP50: br label %[[SWITCH:[^,]+]] +// OMP50: [[FALSE]]: +// OMP50: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP50: br label %[[SWITCH]] +// OMP50: [[SWITCH]]: +// OMP50: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP50: store i64 [[UP]], i64* [[UB]], +// OMP50: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP50: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], + +// ... +// OMP50: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] +// OMP50-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] +// OMP50-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] +// OMP50: [[T1_BODY]]: +// Loop counters i and j updates: +// OMP50: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 +// OMP50-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 +// OMP50-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// OMP50-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 +// OMP50-NEXT: store i32 [[I_2]], i32* +// OMP50: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// OMP50-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// OMP50-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] +// OMP50-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// OMP50-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] +// OMP50-NEXT: store i64 [[J_2_ADD0]], i64* +// simd.for.inc: +// OMP50: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 +// OMP50-NEXT: store i64 [[INC]], i64* +// OMP50-NEXT: br label {{%.+}} +// OMP50: [[T1_END]]: +// OMP50: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) +// OMP50: ret void +// typedef int MyIdx; @@ -674,51 +729,77 @@ void widened(float *a, float *b, float *c, float *d) { // CHECK: ret void } -// CHECK: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], -// CHECK: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 -// CHECK: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] -// CHECK: [[TRUE]]: -// CHECK: br label %[[SWITCH:[^,]+]] -// CHECK: [[FALSE]]: -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], -// CHECK: br label %[[SWITCH]] -// CHECK: [[SWITCH]]: -// CHECK: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] -// CHECK: store i64 [[UP]], i64* [[UB]], -// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], -// CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], +// CHECK-LABEL: if_clause +void if_clause(int a) { + #pragma omp parallel for simd if(a) schedule(static, 1) +for (int i = 0; i < 10; ++i); +} +// CHECK: call void @__kmpc_for_static_init_4( +// OMP50: [[COND:%.+]] = trunc i8 %{{.+}} to i1 +// OMP50: br i1 [[COND]], label {{%?}}[[THEN:.+]], label {{%?}}[[ELSE:.+]] + +// OMP50: [[THEN]]: +// OMP45: br label {{.+}}, !llvm.loop ![[VECT:.+]] +// OMP50: br label {{.+}}, !llvm.loop ![[VECT:.+]] +// OMP50: [[ELSE]]: +// OMP50: br label {{.+}}, !llvm.loop ![[NOVECT:.+]] +// CHECK: call void @__kmpc_for_static_fini( + +// OMP45: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) +// OMP45: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP45: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 +// OMP45: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] +// OMP45: [[TRUE]]: +// OMP45: br label %[[SWITCH:[^,]+]] +// OMP45: [[FALSE]]: +// OMP45: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP45: br label %[[SWITCH]] +// OMP45: [[SWITCH]]: +// OMP45: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP45: store i64 [[UP]], i64* [[UB]], +// OMP45: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP45: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], // ... -// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] -// CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] -// CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] -// CHECK: [[T1_BODY]]: +// OMP45: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] +// OMP45-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] +// OMP45-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] +// OMP45: [[T1_BODY]]: // Loop counters i and j updates: -// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 -// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 -// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] -// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 -// CHECK-NEXT: store i32 [[I_2]], i32* -// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 -// CHECK-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 -// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 -// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] -// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* +// OMP45: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 +// OMP45-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 +// OMP45-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// OMP45-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 +// OMP45-NEXT: store i32 [[I_2]], i32* +// OMP45: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// OMP45-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// OMP45-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] +// OMP45-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// OMP45-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] +// OMP45-NEXT: store i64 [[J_2_ADD0]], i64* // simd.for.inc: -// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 -// CHECK-NEXT: store i64 [[INC]], i64* -// CHECK-NEXT: br label {{%.+}} -// CHECK: [[T1_END]]: -// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) -// CHECK: ret void +// OMP45: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 +// OMP45-NEXT: store i64 [[INC]], i64* +// OMP45-NEXT: br label {{%.+}} +// OMP45: [[T1_END]]: +// OMP45: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) +// OMP45: ret void // + +// OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} +// OMP45-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP45-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} +// OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} +// OMP50-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP50-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} +// OMP50-DAG: ![[NOVECT]] = distinct !{![[NOVECT]], ![[NOVM:.+]]} +// OMP50-DAG: ![[NOVM]] = !{!"llvm.loop.vectorize.enable", i1 false} + // TERM_DEBUG-LABEL: bar int bar() {return 0;}; -- 2.7.4