From a781521867e9952e8d5856e10bf900b37f8ec4e8 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 3 Feb 2020 12:08:16 -0500 Subject: [PATCH] [OPENMP50]Codegen support for order(concurrent) clause. Emit llvm parallel access metadata for the loops if they are marked as order(concurrent). --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 17 +++++++-- clang/test/OpenMP/for_codegen.cpp | 18 +++++---- ...teams_distribute_parallel_for_order_codegen.cpp | 44 ++++++++++++++++++++++ 3 files changed, 69 insertions(+), 10 deletions(-) create mode 100644 clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index a84f2fe..827ea213 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1931,6 +1931,9 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, LoopStack.setParallel(!IsMonotonic); LoopStack.setVectorizeEnable(); emitSimdlenSafelenClause(*this, D, IsMonotonic); + if (const auto *C = D.getSingleClause()) + if (C->getKind() == OMPC_ORDER_concurrent) + LoopStack.setParallel(/*Enable=*/true); } void CodeGenFunction::EmitOMPSimdFinal( @@ -2202,10 +2205,14 @@ void CodeGenFunction::EmitOMPOuterLoop( [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { // Generate !llvm.loop.parallel metadata for loads and stores for loops // with dynamic/guided scheduling and without ordered clause. - if (!isOpenMPSimdDirective(S.getDirectiveKind())) + if (!isOpenMPSimdDirective(S.getDirectiveKind())) { CGF.LoopStack.setParallel(!IsMonotonic); - else + if (const auto *C = S.getSingleClause()) + if (C->getKind() == OMPC_ORDER_concurrent) + CGF.LoopStack.setParallel(/*Enable=*/true); + } else { CGF.EmitOMPSimdInit(S, IsMonotonic); + } }, [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2720,8 +2727,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( emitCommonSimdLoop( *this, S, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - if (isOpenMPSimdDirective(S.getDirectiveKind())) + if (isOpenMPSimdDirective(S.getDirectiveKind())) { CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + } else if (const auto *C = S.getSingleClause()) { + if (C->getKind() == OMPC_ORDER_concurrent) + CGF.LoopStack.setParallel(/*Enable=*/true); + } }, [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, &S, ScheduleKind, LoopExit, diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp index a837a4a..9082eaa 100644 --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -743,28 +743,28 @@ void body_f(); // OMP5-LABEL: imperfectly_nested_loop void imperfectly_nested_loop() { // OMP5: call void @__kmpc_for_static_init_4( -#pragma omp for collapse(3) +#pragma omp for collapse(3) order(concurrent) for (int i = 0; i < 10; ++i) { { int a, d; // OMP5: invoke void @{{.+}}first{{.+}}() first(); - // OMP5: load i32 - // OMP5: store i32 + // OMP5: load i32{{.*}}!llvm.access.group ![[AG:[0-9]+]] + // OMP5: store i32{{.*}}!llvm.access.group ![[AG]] a = d; for (int j = 0; j < 10; ++j) { int a, d; // OMP5: invoke void @{{.+}}inner_f{{.+}}() inner_f(); - // OMP5: load i32 - // OMP5: store i32 + // OMP5: load i32{{.*}}!llvm.access.group ![[AG]] + // OMP5: store i32{{.*}}!llvm.access.group ![[AG]] a = d; for (int k = 0; k < 10; ++k) { int a, d; // OMP5: invoke void @{{.+}}body_f{{.+}}() body_f(); - // OMP5: load i32 - // OMP5: store i32 + // OMP5: load i32{{.*}}!llvm.access.group ![[AG]] + // OMP5: store i32{{.*}}!llvm.access.group ![[AG]] a = d; } // OMP5: invoke void @{{.+}}inner_l{{.+}}() @@ -776,6 +776,10 @@ void imperfectly_nested_loop() { } // OMP5: call void @__kmpc_for_static_fini( } + +// OMP5: ![[AG]] = distinct !{} +// OMP5: !{!"llvm.loop.parallel_accesses", ![[AG]]} + #endif #endif // HEADER diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp new file mode 100644 index 0000000..201c19e --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY +// SIMD-ONLY-NOT: {{__kmpc|__tgt}} +// REQUIRES: powerpc-registered-target + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test +void gtid_test() { +// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100) +// CHECK: %0 = call i32 @__tgt_target_teams(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0) +// CHECK: call void [[TARGET_OUTLINE:@.+]]() +// CHECK: ret void +#pragma omp target teams distribute parallel for order(concurrent) + for(int i = 0 ; i < 100; i++) {} +} + +// CHECK: define internal void [[TARGET_OUTLINE]]() +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @{{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[TEAMS_OUTLINE:@.+]] to void (i32*, i32*, ...)*)) +// CHECK: ret void + +// CHECK: define internal void [[TEAMS_OUTLINE]](i32* {{.+}}, i32* {{.+}}) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-NOT: {{store|load}}{{.+}}!llvm.access.group ! +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* [[PARALLEL_OUTLINE:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}) +// CHECK-NOT: {{store|load}}{{.+}}!llvm.access.group ! +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal void [[PARALLEL_OUTLINE]](i32* {{.+}}, i32* {{.+}}, i64 {{.+}}, i64 {{.+}}) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: {{store|load}}{{.+}}!llvm.access.group ![[AG:[0-9]+]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: ![[AG]] = distinct !{} +// CHECK: !{!"llvm.loop.parallel_accesses", ![[AG]]} +#endif -- 2.7.4