From a781521867e9952e8d5856e10bf900b37f8ec4e8 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Mon, 3 Feb 2020 12:08:16 -0500
Subject: [PATCH] [OPENMP50]Codegen support for order(concurrent) clause.

Emit llvm parallel access metadata for the loops if they are marked as
order(concurrent).
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp                 | 17 +++++++--
 clang/test/OpenMP/for_codegen.cpp                  | 18 +++++----
 ...teams_distribute_parallel_for_order_codegen.cpp | 44 ++++++++++++++++++++++
 3 files changed, 69 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index a84f2fe..827ea213 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1931,6 +1931,9 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
   LoopStack.setParallel(!IsMonotonic);
   LoopStack.setVectorizeEnable();
   emitSimdlenSafelenClause(*this, D, IsMonotonic);
+  if (const auto *C = D.getSingleClause<OMPOrderClause>())
+    if (C->getKind() == OMPC_ORDER_concurrent)
+      LoopStack.setParallel(/*Enable=*/true);
 }
 
 void CodeGenFunction::EmitOMPSimdFinal(
@@ -2202,10 +2205,14 @@ void CodeGenFunction::EmitOMPOuterLoop(
       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
         // Generate !llvm.loop.parallel metadata for loads and stores for loops
         // with dynamic/guided scheduling and without ordered clause.
-        if (!isOpenMPSimdDirective(S.getDirectiveKind()))
+        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
           CGF.LoopStack.setParallel(!IsMonotonic);
-        else
+          if (const auto *C = S.getSingleClause<OMPOrderClause>())
+            if (C->getKind() == OMPC_ORDER_concurrent)
+              CGF.LoopStack.setParallel(/*Enable=*/true);
+        } else {
           CGF.EmitOMPSimdInit(S, IsMonotonic);
+        }
       },
       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -2720,8 +2727,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
         emitCommonSimdLoop(
             *this, S,
             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-              if (isOpenMPSimdDirective(S.getDirectiveKind()))
+              if (isOpenMPSimdDirective(S.getDirectiveKind())) {
                 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+              } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
+                if (C->getKind() == OMPC_ORDER_concurrent)
+                  CGF.LoopStack.setParallel(/*Enable=*/true);
+              }
             },
             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
              &S, ScheduleKind, LoopExit,
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index a837a4a..9082eaa 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -743,28 +743,28 @@ void body_f();
 // OMP5-LABEL: imperfectly_nested_loop
 void imperfectly_nested_loop() {
   // OMP5: call void @__kmpc_for_static_init_4(
-#pragma omp for collapse(3)
+#pragma omp for collapse(3) order(concurrent)
   for (int i = 0; i < 10; ++i) {
     {
       int a, d;
       // OMP5: invoke void @{{.+}}first{{.+}}()
       first();
-      // OMP5: load i32
-      // OMP5: store i32
+      // OMP5: load i32{{.*}}!llvm.access.group ![[AG:[0-9]+]]
+      // OMP5: store i32{{.*}}!llvm.access.group ![[AG]]
       a = d;
       for (int j = 0; j < 10; ++j) {
         int a, d;
         // OMP5: invoke void @{{.+}}inner_f{{.+}}()
         inner_f();
-        // OMP5: load i32
-        // OMP5: store i32
+        // OMP5: load i32{{.*}}!llvm.access.group ![[AG]]
+        // OMP5: store i32{{.*}}!llvm.access.group ![[AG]]
         a = d;
         for (int k = 0; k < 10; ++k) {
           int a, d;
           // OMP5: invoke void @{{.+}}body_f{{.+}}()
           body_f();
-          // OMP5: load i32
-          // OMP5: store i32
+          // OMP5: load i32{{.*}}!llvm.access.group ![[AG]]
+          // OMP5: store i32{{.*}}!llvm.access.group ![[AG]]
           a = d;
         }
         // OMP5: invoke void @{{.+}}inner_l{{.+}}()
@@ -776,6 +776,10 @@ void imperfectly_nested_loop() {
   }
   // OMP5: call void @__kmpc_for_static_fini(
 }
+
+// OMP5: ![[AG]] = distinct !{}
+// OMP5: !{!"llvm.loop.parallel_accesses", ![[AG]]}
+
 #endif
 
 #endif // HEADER
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
new file mode 100644
index 0000000..201c19e
--- /dev/null
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+// REQUIRES: powerpc-registered-target
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
+// CHECK: %0 = call i32 @__tgt_target_teams(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK: call void [[TARGET_OUTLINE:@.+]]()
+// CHECK: ret void
+#pragma omp target teams distribute parallel for order(concurrent)
+  for(int i = 0 ; i < 100; i++) {}
+}
+
+// CHECK: define internal void [[TARGET_OUTLINE]]()
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @{{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[TEAMS_OUTLINE:@.+]] to void (i32*, i32*, ...)*))
+// CHECK: ret void
+
+// CHECK: define internal void [[TEAMS_OUTLINE]](i32* {{.+}}, i32* {{.+}})
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-NOT: {{store|load}}{{.+}}!llvm.access.group !
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* [[PARALLEL_OUTLINE:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}})
+// CHECK-NOT: {{store|load}}{{.+}}!llvm.access.group !
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal void [[PARALLEL_OUTLINE]](i32* {{.+}}, i32* {{.+}}, i64 {{.+}}, i64 {{.+}})
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: {{store|load}}{{.+}}!llvm.access.group ![[AG:[0-9]+]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: ![[AG]] = distinct !{}
+// CHECK: !{!"llvm.loop.parallel_accesses", ![[AG]]}
+#endif
-- 
2.7.4