[OPENMP] Codegen for 'num_threads' clause in 'parallel' directive.
authorAlexey Bataev <a.bataev@hotmail.com>
Mon, 13 Oct 2014 08:23:51 +0000 (08:23 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Mon, 13 Oct 2014 08:23:51 +0000 (08:23 +0000)
This patch generates call to "kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads);" library function before calling "kmpc_fork_call" each time there is an associated "num_threads" clause in the "omp parallel" directive.
Differential Revision: http://reviews.llvm.org/D5145

llvm-svn: 219599

clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGOpenMPRuntime.h
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/parallel_num_threads_codegen.cpp [new file with mode: 0644]

index 53d4b6c..1a50c94 100644 (file)
@@ -296,6 +296,16 @@ CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
     break;
   }
+  case OMPRTL__kmpc_push_num_threads: {
+    // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_threads)
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
+    break;
+  }
   case OMPRTL__kmpc_serialized_parallel: {
     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
     // global_tid);
@@ -431,3 +441,15 @@ void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF,
   CGF.EmitRuntimeCall(RTLFn, Args);
 }
 
+void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
+                                              llvm::Value *NumThreads,
+                                              SourceLocation Loc) {
+  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
+  llvm::Value *Args[] = {
+      EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc),
+      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
+  llvm::Constant *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction(
+      CGOpenMPRuntime::OMPRTL__kmpc_push_num_threads);
+  CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
index 0437882..c4da375 100644 (file)
@@ -80,7 +80,10 @@ public:
     OMPRTL__kmpc_serialized_parallel,
     // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
     // global_tid);
-    OMPRTL__kmpc_end_serialized_parallel
+    OMPRTL__kmpc_end_serialized_parallel,
+    // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_threads);
+    OMPRTL__kmpc_push_num_threads
   };
 
 private:
@@ -250,6 +253,14 @@ public:
   ///
   virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
                                   OpenMPLocationFlags Flags);
+
+  /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  virtual void EmitOMPNumThreadsClause(CodeGenFunction &CGF,
+                                       llvm::Value *NumThreads,
+                                       SourceLocation Loc);
 };
 } // namespace CodeGen
 } // namespace clang
index a459d07..2b0fb04 100644 (file)
@@ -183,6 +183,23 @@ void CodeGenFunction::EmitOMPFirstprivateClause(
   }
 }
 
+/// \brief Emits code for OpenMP parallel directive in the parallel region.
+static void EmitOMPParallelCall(CodeGenFunction &CGF,
+                                const OMPParallelDirective &S,
+                                llvm::Value *OutlinedFn,
+                                llvm::Value *CapturedStruct) {
+  if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
+    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+    auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
+    auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+                                         /*IgnoreResultAssign*/ true);
+    CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause(
+        CGF, NumThreads, NumThreadsClause->getLocStart());
+  }
+  CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(),
+                                                 OutlinedFn, CapturedStruct);
+}
+
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
@@ -192,16 +209,13 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
     auto Cond = cast<OMPIfClause>(C)->getCondition();
     EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
       if (ThenBlock)
-        CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(),
-                                                   OutlinedFn, CapturedStruct);
+        EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
       else
         CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(),
                                                  OutlinedFn, CapturedStruct);
     });
-  } else {
-    CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(),
-                                               OutlinedFn, CapturedStruct);
-  }
+  } else
+    EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
 }
 
 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
index 23f5f13..133bffa 100644 (file)
@@ -3439,7 +3439,6 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads,
                                              SourceLocation EndLoc) {
   Expr *ValExpr = NumThreads;
   if (!NumThreads->isValueDependent() && !NumThreads->isTypeDependent() &&
-      !NumThreads->isInstantiationDependent() &&
       !NumThreads->containsUnexpandedParameterPack()) {
     SourceLocation NumThreadsLoc = NumThreads->getLocStart();
     ExprResult Val =
diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
new file mode 100644 (file)
index 0000000..8b1b76f
--- /dev/null
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+  intptr_t a, b, c;
+  S(intptr_t a) : a(a) {}
+  operator char() { return a; }
+  ~S() {}
+};
+
+template <typename T, int C>
+int tmain() {
+#pragma omp parallel num_threads(C)
+  foo();
+#pragma omp parallel num_threads(T(23))
+  foo();
+  return 0;
+}
+
+int main() {
+  S s(0);
+  char a = s;
+#pragma omp parallel num_threads(2)
+  foo();
+#pragma omp parallel num_threads(a)
+  foo();
+  return a + tmain<char, 5>() + tmain<S, 1>();
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+// CHECK-DAG:   [[S_ADDR:%.+]] = alloca [[S_TY]]
+// CHECK-DAG:   [[A_ADDR:%.+]] = alloca i8
+// CHECK-DAG:   [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK-DAG:   call void [[S_TY_CONSTR:@.+]]([[S_TY]]* [[S_ADDR]], [[INTPTR_T_TY]] 0)
+// CHECK:       [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* [[S_ADDR]])
+// CHECK:       store i8 [[S_CHAR_OP]], i8* [[A_ADDR]]
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       [[A_VAL:%.+]] = load i8* [[A_ADDR]]
+// CHECK:       [[RES:%.+]] = sext i8 [[A_VAL]] to i32
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       invoke [[INT_TY:i[0-9]+]] [[TMAIN_CHAR_5:@.+]]()
+// CHECK:       invoke [[INT_TY]] [[TMAIN_S_1:@.+]]()
+// CHECK:       call void [[S_TY_DESTR:@.+]]([[S_TY]]* [[S_ADDR]])
+// CHECK:       ret [[INT_TY]]
+// CHECK:       }
+
+// CHECK:       define{{.*}} [[INT_TY]] [[TMAIN_CHAR_5]]()
+// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       ret [[INT_TY]] 0
+// CHECK-NEXT:  }
+
+// CHECK:       define{{.*}} [[INT_TY]] [[TMAIN_S_1]]()
+// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1)
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       call void [[S_TY_CONSTR]]([[S_TY]]* [[S_TEMP:%.+]], [[INTPTR_T_TY]] 23)
+// CHECK:       [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]]([[S_TY]]* [[S_TEMP]])
+// CHECK:       [[RES:%.+]] = sext {{.*}}i8 [[S_CHAR_OP]] to i32
+// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
+// CHECK:       call void [[S_TY_DESTR]]([[S_TY]]* [[S_TEMP]])
+// CHECK:       call void {{.*}}* @__kmpc_fork_call(
+// CHECK:       ret [[INT_TY]] 0
+// CHECK:       }
+
+#endif