[OPENMP] Initial codegen for 'omp sections' and 'omp section' directives.
authorAlexey Bataev <a.bataev@hotmail.com>
Thu, 12 Mar 2015 08:53:29 +0000 (08:53 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Thu, 12 Mar 2015 08:53:29 +0000 (08:53 +0000)
If only one section is found in the sections region, it is emitted just like single region.
Otherwise it is emitted as a static non-chunked loop.

#pragma omp sections
{
#pragma omp section
  {1}
  ...
  #pragma omp section
  {n}
}
is translated to something like

i32 <iter_var>
i32 <last_iter> = 0
i32 <lower_bound> = 0
i32 <upper_bound> = n-1
i32 <stride> = 1
call void @__kmpc_for_static_init_4(<loc>, i32 <gtid>, i32 34/*static non-chunked*/, i32* <last_iter>, i32* <lower_bound>, i32* <upper_bound>, i32* <stride>, i32 1/*increment always 1*/, i32 1/*chunk always 1*/)
<upper_bound> = min(<upper_bound>, n-1)
<iter_var> = <lb>
check:
br <iter_var> <= <upper_bound>, label cont, label exit
continue:
switch (IV) {
  case 0:
  {1};
  break;
  ...
  case <NumSection> - 1:
  {n};
  break;
  }
  ++<iter_var>
  br label check
  exit:
  call void @__kmpc_for_static_fini(<loc>, i32 <gtid>)
Differential Revision: http://reviews.llvm.org/D8244

llvm-svn: 232021

clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/OpenMP/sections_codegen.cpp [new file with mode: 0644]

index ed68155..0a82adf 100644 (file)
@@ -282,9 +282,10 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
   }
 }
 
-void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
-                                       OMPPrivateScope &LoopScope,
-                                       bool SeparateIter) {
+void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
+                                       const Expr *LoopCond,
+                                       const Expr *IncExpr,
+                                       const std::function<void()> &BodyGen) {
   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
   auto Cnt = getPGORegionCounter(&S);
 
@@ -296,17 +297,13 @@ void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
   auto ExitBlock = LoopExit.getBlock();
-  if (LoopScope.requiresCleanups())
+  if (RequiresCleanup)
     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
 
   auto LoopBody = createBasicBlock("omp.inner.for.body");
 
-  // Emit condition: "IV < LastIteration + 1 [ - 1]"
-  // ("- 1" when lastprivate clause is present - separate one iteration).
-  llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter));
-  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
-                       PGO.createLoopWeights(S.getCond(SeparateIter), Cnt));
-
+  // Emit condition.
+  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
   if (ExitBlock != LoopExit.getBlock()) {
     EmitBlock(ExitBlock);
     EmitBranchThroughCleanup(LoopExit);
@@ -319,12 +316,11 @@ void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  EmitOMPLoopBody(S);
-  EmitStopPoint(&S);
+  BodyGen();
 
   // Emit "IV = IV + 1" and a back-edge to the condition block.
   EmitBlock(Continue.getBlock());
-  EmitIgnoredExpr(S.getInc());
+  EmitIgnoredExpr(IncExpr);
   BreakContinueStack.pop_back();
   EmitBranch(CondBlock);
   LoopStack.pop();
@@ -460,7 +456,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
     {
       OMPPrivateScope LoopScope(*this);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
-      EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true);
+      EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                       S.getCond(/*SeparateIter=*/true), S.getInc(),
+                       [&S, this]() {
+                         EmitOMPLoopBody(S);
+                         EmitStopPoint(&S);
+                       });
       EmitOMPLoopBody(S, /* SeparateIter */ true);
     }
     EmitOMPSimdFinal(S);
@@ -471,7 +472,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
     {
       OMPPrivateScope LoopScope(*this);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
-      EmitOMPInnerLoop(S, LoopScope);
+      EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                       S.getCond(/*SeparateIter=*/false), S.getInc(),
+                       [&S, this]() {
+                         EmitOMPLoopBody(S);
+                         EmitStopPoint(&S);
+                       });
     }
     EmitOMPSimdFinal(S);
   }
@@ -543,7 +549,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  EmitOMPInnerLoop(S, LoopScope);
+  EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                   S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() {
+                     EmitOMPLoopBody(S);
+                     EmitStopPoint(&S);
+                   });
 
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
@@ -638,7 +648,12 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
         // IV = LB;
         EmitIgnoredExpr(S.getInit());
         // while (idx <= UB) { BODY; ++idx; }
-        EmitOMPInnerLoop(S, LoopScope);
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                         S.getCond(/*SeparateIter=*/false), S.getInc(),
+                         [&S, this]() {
+                           EmitOMPLoopBody(S);
+                           EmitStopPoint(&S);
+                         });
         // Tell the runtime we are done.
         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
       } else {
@@ -669,12 +684,108 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
-  llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
+static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
+                                const Twine &Name,
+                                llvm::Value *Init = nullptr) {
+  auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
+  if (Init)
+    CGF.EmitScalarInit(Init, LVal);
+  return LVal;
+}
+
+void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
+
+  auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
+  auto *CS = dyn_cast<CompoundStmt>(Stmt);
+  if (CS && CS->size() > 1) {
+    auto &C = CGM.getContext();
+    auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+    // Emit helper vars inits.
+    LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.",
+                                  Builder.getInt32(0));
+    auto *GlobalUBVal = Builder.getInt32(CS->size() - 1);
+    LValue UB =
+        createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
+    LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.",
+                                  Builder.getInt32(1));
+    LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.",
+                                  Builder.getInt32(0));
+    // Loop counter.
+    LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv.");
+    OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+    OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV);
+    OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+    OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB);
+    // Generate condition for loop.
+    BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
+                        OK_Ordinary, S.getLocStart(), /*fpContractable=*/false);
+    // Increment for loop counter.
+    UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
+                      S.getLocStart());
+    auto BodyGen = [this, CS, &S, &IV]() {
+      // Iterate through all sections and emit a switch construct:
+      // switch (IV) {
+      //   case 0:
+      //     <SectionStmt[0]>;
+      //     break;
+      // ...
+      //   case <NumSection> - 1:
+      //     <SectionStmt[<NumSection> - 1]>;
+      //     break;
+      // }
+      // .omp.sections.exit:
+      auto *ExitBB = createBasicBlock(".omp.sections.exit");
+      auto *SwitchStmt = Builder.CreateSwitch(
+          EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
+          CS->size());
+      unsigned CaseNumber = 0;
+      for (auto C = CS->children(); C; ++C, ++CaseNumber) {
+        auto CaseBB = createBasicBlock(".omp.sections.case");
+        EmitBlock(CaseBB);
+        SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
+        EmitStmt(*C);
+        EmitBranch(ExitBB);
+      }
+      EmitBlock(ExitBB, /*IsFinished=*/true);
+    };
+    // Emit static non-chunked loop.
+    CGM.getOpenMPRuntime().emitForInit(
+        *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
+        /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
+        ST.getAddress());
+    // UB = min(UB, GlobalUB);
+    auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart());
+    auto *MinUBGlobalUB = Builder.CreateSelect(
+        Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
+    EmitStoreOfScalar(MinUBGlobalUB, UB);
+    // IV = LB;
+    EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV);
+    // while (idx <= UB) { BODY; ++idx; }
+    EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
+    // Tell the runtime we are done.
+    CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(),
+                                         OMPC_SCHEDULE_static);
+  } else {
+    // If only one section is found - no need to generate loop, emit as a single
+    // region.
+    CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
+      InlinedOpenMPRegionScopeRAII Region(*this, S);
+      EmitStmt(Stmt);
+      EnsureInsertPoint();
+    }, S.getLocStart());
+  }
+
+  // Emit an implicit barrier at the end.
+  if (!S.getSingleClause(OMPC_nowait))
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                           /*IsExplicit=*/false);
 }
 
-void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
-  llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
+void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
+  EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  EnsureInsertPoint();
 }
 
 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
index 2103766..4c62a49 100644 (file)
@@ -2094,8 +2094,9 @@ private:
   /// Helpers for the OpenMP loop directives.
   void EmitOMPLoopBody(const OMPLoopDirective &Directive,
                        bool SeparateIter = false);
-  void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
-                        bool SeparateIter = false);
+  void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
+                        const Expr *LoopCond, const Expr *IncExpr,
+                        const std::function<void()> &BodyGen);
   void EmitOMPSimdFinal(const OMPLoopDirective &S);
   void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp
new file mode 100644 (file)
index 0000000..9485d9f
--- /dev/null
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: foo
+void foo() {};
+// CHECK-LABEL: bar
+void bar() {};
+
+template <class T>
+T tmain() {
+#pragma omp parallel
+#pragma omp sections
+  {
+    foo();
+  }
+  return T();
+}
+
+// CHECK-LABEL: @main
+int main() {
+  float l = 0.0; // Used as a base point in checks.
+// CHECK: [[GTID:%.+]] = call{{.*}} i32 @__kmpc_global_thread_num({{.*}})
+// CHECK: store float
+#pragma omp sections nowait
+  {
+// CHECK:      store i32 0, i32* [[LB_PTR:%.+]],
+// CHECK:      store i32 1, i32* [[UB_PTR:%.+]],
+// CHECK:      call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1)
+// <<UB = min(UB, GlobalUB);>>
+// CHECK:      [[UB:%.+]] = load i32, i32* [[UB_PTR]]
+// CHECK:      [[CMP:%.+]] = icmp slt i32 [[UB]], 1
+// CHECK:      [[MIN_UB_GLOBALUB:%.+]] = select i1 [[CMP]], i32 [[UB]], i32 1
+// CHECK:      store i32 [[MIN_UB_GLOBALUB]], i32* [[UB_PTR]]
+// <<IV = LB;>>
+// CHECK:      [[LB:%.+]] = load i32, i32* [[LB_PTR]]
+// CHECK:      store i32 [[LB]], i32* [[IV_PTR:%.+]]
+// CHECK:      br label %[[INNER_FOR_COND:.+]]
+// CHECK:      [[INNER_FOR_COND]]
+// <<IV <= UB?>>
+// CHECK:      [[IV:%.+]] = load i32, i32* [[IV_PTR]]
+// CHECK:      [[UB:%.+]] = load i32, i32* [[UB_PTR]]
+// CHECK:      [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
+// CHECK:      br i1 [[CMP]], label %[[INNER_LOOP_BODY:.+]], label %[[INNER_LOOP_END:.+]]
+// CHECK:      [[INNER_LOOP_BODY]]
+// <<TRUE>> - > <BODY>
+// CHECK:      [[IV:%.+]] = load i32, i32* [[IV_PTR]]
+// CHECK:      switch i32 [[IV]], label %[[SECTIONS_EXIT:.+]] [
+// CHECK-NEXT: i32 0, label %[[SECTIONS_CASE0:.+]]
+// CHECK-NEXT: i32 1, label %[[SECTIONS_CASE1:.+]]
+#pragma omp section
+// CHECK:      [[SECTIONS_CASE0]]
+// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}()
+// CHECK:      br label %[[SECTIONS_EXIT]]
+    foo();
+#pragma omp section
+// CHECK:      [[SECTIONS_CASE1]]
+// CHECK-NEXT: invoke void @{{.*}}bar{{.*}}()
+// CHECK:      br label %[[SECTIONS_EXIT]]
+    bar();
+// CHECK:      [[SECTIONS_EXIT]]
+// <<++IV;>>
+// CHECK:      [[IV:%.+]] = load i32, i32* [[IV_PTR]]
+// CHECK-NEXT: [[INC:%.+]] = add nsw i32 [[IV]], 1
+// CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]]
+// CHECK-NEXT: br label %[[INNER_FOR_COND]]
+// CHECK:      [[INNER_LOOP_END]]
+  }
+// CHECK:      call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK-NOT:  __kmpc_cancel_barrier
+  return tmain<int>();
+}
+
+// CHECK-LABEL: tmain
+// CHECK:       call void {{.*}} @__kmpc_fork_call(
+// CHECK-NOT:   __kmpc_global_thread_num
+// CHECK:       [[RES:%.+]] = call i32 @__kmpc_single(
+// CHECK-NEXT:  [[BOOLRES:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[BOOLRES]], label %[[THEN:.+]], label %[[END:.+]]
+// CHECK:       [[THEN]]
+// CHECK-NEXT:  invoke void @{{.*}}foo{{.*}}()
+// CHECK-NEXT:  unwind label %[[TERM_LPAD:.+]]
+// CHECK:       call void @__kmpc_end_single(
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+// CHECK-NEXT:  call i32 @__kmpc_cancel_barrier(
+// CHECK-NEXT:  ret
+// CHECK:       [[TERM_LPAD]]
+// CHECK:       call void @__clang_call_terminate(i8*
+// CHECK-NEXT:  unreachable
+
+#endif