[OPENMP] Fix use of unsigned counters in loops with zero trip count.

author Alexey Bataev <a.bataev@hotmail.com>

Wed, 22 Apr 2015 11:59:37 +0000 (11:59 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Wed, 22 Apr 2015 11:59:37 +0000 (11:59 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Wed, 22 Apr 2015 11:59:37 +0000 (11:59 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Wed, 22 Apr 2015 11:59:37 +0000 (11:59 +0000)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp

index 9451802..c83dda2 100644 (file)
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -680,6 +680,38 @@ static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
    }
  }
  
+static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
+                        const Expr *Cond, llvm::BasicBlock *TrueBlock,
+                        llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
+  CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
+  EmitPrivateLoopCounters(CGF, PreCondScope, S.counters());
+  const VarDecl *IVDecl =
+      cast<VarDecl>(cast<DeclRefExpr>(S.getIterationVariable())->getDecl());
+  bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{
+    // Emit var without initialization.
+    auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl);
+    CGF.EmitAutoVarCleanups(VarEmission);
+    return VarEmission.getAllocatedAddress();
+  });
+  assert(IsRegistered && "counter already registered as private");
+  // Silence the warning about unused variable.
+  (void)IsRegistered;
+  (void)PreCondScope.Privatize();
+  // Initialize internal counter to 0 to calculate initial values of real
+  // counters.
+  LValue IV = CGF.EmitLValue(S.getIterationVariable());
+  CGF.EmitStoreOfScalar(
+      llvm::ConstantInt::getNullValue(
+          IV.getAddress()->getType()->getPointerElementType()),
+      CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true);
+  // Get initial values of real counters.
+  for (auto I : S.updates()) {
+    CGF.EmitIgnoredExpr(I);
+  }
+  // Check that loop is executed at least one time.
+  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
+}
+
  static void
  EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
                        CodeGenFunction::OMPPrivateScope &PrivateScope) {
@@ -704,7 +736,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
      // Pragma 'simd' code depends on presence of 'lastprivate'.
      // If present, we have to separate last iteration of the loop:
      //
-    // if (LastIteration != 0) {
+    // if (PreCond) {
      //   for (IV in 0..LastIteration-1) BODY;
      //   BODY with updates of lastprivate vars;
      //   <Final counter/linear vars updates>;
@@ -712,10 +744,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
      //
      // otherwise (when there's no lastprivate):
      //
+    // if (PreCond) {
      //   for (IV in 0..LastIteration) BODY;
      //   <Final counter/linear vars updates>;
+    // }
      //
  
+    // Emit: if (PreCond) - begin.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      RegionCounter Cnt = CGF.getPGORegionCounter(&S);
+      auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
+      ContBlock = CGF.createBasicBlock("simd.if.end");
+      emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
+      CGF.EmitBlock(ThenBlock);
+      Cnt.beginRegion(CGF.Builder);
+    }
      // Walk clauses and process safelen/lastprivate.
      bool SeparateIter = false;
      CGF.LoopStack.setParallel();
@@ -780,51 +830,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
          }
      }
  
-    if (SeparateIter) {
-      // Emit: if (LastIteration > 0) - begin.
-      RegionCounter Cnt = CGF.getPGORegionCounter(&S);
-      auto ThenBlock = CGF.createBasicBlock("simd.if.then");
-      auto ContBlock = CGF.createBasicBlock("simd.if.end");
-      CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
-                               Cnt.getCount());
-      CGF.EmitBlock(ThenBlock);
-      Cnt.beginRegion(CGF.Builder);
-      // Emit 'then' code.
-      {
-        OMPPrivateScope LoopScope(CGF);
-        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
-        EmitPrivateLinearVars(CGF, S, LoopScope);
-        CGF.EmitOMPPrivateClause(S, LoopScope);
-        (void)LoopScope.Privatize();
-        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                             S.getCond(/*SeparateIter=*/true), S.getInc(),
-                             [&S](CodeGenFunction &CGF) {
-                               CGF.EmitOMPLoopBody(S);
-                               CGF.EmitStopPoint(&S);
-                             },
-                             [](CodeGenFunction &) {});
-        CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
+    {
+      OMPPrivateScope LoopScope(CGF);
+      EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
+      EmitPrivateLinearVars(CGF, S, LoopScope);
+      CGF.EmitOMPPrivateClause(S, LoopScope);
+      (void)LoopScope.Privatize();
+      CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                           S.getCond(SeparateIter), S.getInc(),
+                           [&S](CodeGenFunction &CGF) {
+                             CGF.EmitOMPLoopBody(S);
+                             CGF.EmitStopPoint(&S);
+                           },
+                           [](CodeGenFunction &) {});
+      if (SeparateIter) {
+        CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true);
        }
-      CGF.EmitOMPSimdFinal(S);
-      // Emit: if (LastIteration != 0) - end.
+    }
+    CGF.EmitOMPSimdFinal(S);
+    // Emit: if (PreCond) - end.
+    if (ContBlock) {
        CGF.EmitBranch(ContBlock);
        CGF.EmitBlock(ContBlock, true);
-    } else {
-      {
-        OMPPrivateScope LoopScope(CGF);
-        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
-        EmitPrivateLinearVars(CGF, S, LoopScope);
-        CGF.EmitOMPPrivateClause(S, LoopScope);
-        (void)LoopScope.Privatize();
-        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                             S.getCond(/*SeparateIter=*/false), S.getInc(),
-                             [&S](CodeGenFunction &CGF) {
-                               CGF.EmitOMPLoopBody(S);
-                               CGF.EmitStopPoint(&S);
-                             },
-                             [](CodeGenFunction &) {});
-      }
-      CGF.EmitOMPSimdFinal(S);
      }
    };
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
@@ -1010,12 +1037,22 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
    // Check pre-condition.
    {
      // Skip the entire loop if we don't meet the precondition.
-    RegionCounter Cnt = getPGORegionCounter(&S);
-    auto ThenBlock = createBasicBlock("omp.precond.then");
-    auto ContBlock = createBasicBlock("omp.precond.end");
-    EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
-    EmitBlock(ThenBlock);
-    Cnt.beginRegion(Builder);
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return false;
+    } else {
+      RegionCounter Cnt = getPGORegionCounter(&S);
+      auto *ThenBlock = createBasicBlock("omp.precond.then");
+      ContBlock = createBasicBlock("omp.precond.end");
+      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
+                  Cnt.getCount());
+      EmitBlock(ThenBlock);
+      Cnt.beginRegion(Builder);
+    }
      // Emit 'then' code.
      {
        // Emit helper vars inits.
@@ -1090,8 +1127,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
              S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
      }
      // We're now done with the loop, so jump to the continuation block.
-    EmitBranch(ContBlock);
-    EmitBlock(ContBlock, true);
+    if (ContBlock) {
+      EmitBranch(ContBlock);
+      EmitBlock(ContBlock, true);
+    }
    }
    return HasLastprivateClause;
  }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp

index fed0ac7..0cfb94e 100644 (file)
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1977,6 +1977,8 @@ public:
    bool ShouldSubtractStep() const { return SubtractStep; }
    /// \brief Build the expression to calculate the number of iterations.
    Expr *BuildNumIterations(Scope *S, const bool LimitedType) const;
+  /// \brief Build the precondition expression for the loops.
+  Expr *BuildPreCond(Scope *S, Expr *Cond) const;
    /// \brief Build reference expression to the counter be used for codegen.
    Expr *BuildCounterVar() const;
    /// \brief Build initization of the counter be used for codegen.
@@ -2380,6 +2382,19 @@ OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S,
    return Diff.get();
  }
  
+Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const {
+  // Try to build LB <op> UB, where <op> is <, >, <=, or >=.
+  bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics();
+  SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
+  auto CondExpr = SemaRef.BuildBinOp(
+      S, DefaultLoc, TestIsLessOp ? (TestIsStrictOp ? BO_LT : BO_LE)
+                                  : (TestIsStrictOp ? BO_GT : BO_GE),
+      LB, UB);
+  SemaRef.getDiagnostics().setSuppressAllDiagnostics(Suppress);
+  // Otherwise use original loop conditon and evaluate it in runtime.
+  return CondExpr.isUsable() ? CondExpr.get() : Cond;
+}
+
  /// \brief Build reference expression to the counter be used for codegen.
  Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const {
    return DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
@@ -2395,6 +2410,8 @@ Expr *OpenMPIterationSpaceChecker::BuildCounterStep() const { return Step; }
  
  /// \brief Iteration space of a single for loop.
  struct LoopIterationSpace {
+  /// \brief Condition of the loop.
+  Expr *PreCond;
    /// \brief This expression calculates the number of iterations in the loop.
    /// It is always possible to calculate it before starting the loop.
    Expr *NumIterations;
@@ -2535,6 +2552,7 @@ static bool CheckOpenMPIterationSpace(
      return HasErrors;
  
    // Build the loop's iteration space representation.
+  ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond());
    ResultIterSpace.NumIterations = ISC.BuildNumIterations(
        DSA.getCurScope(), /* LimitedType */ isOpenMPWorksharingDirective(DKind));
    ResultIterSpace.CounterVar = ISC.BuildCounterVar();
@@ -2545,7 +2563,8 @@ static bool CheckOpenMPIterationSpace(
    ResultIterSpace.IncSrcRange = ISC.GetIncrementSrcRange();
    ResultIterSpace.Subtract = ISC.ShouldSubtractStep();
  
-  HasErrors |= (ResultIterSpace.NumIterations == nullptr ||
+  HasErrors |= (ResultIterSpace.PreCond == nullptr ||
+                ResultIterSpace.NumIterations == nullptr ||
                  ResultIterSpace.CounterVar == nullptr ||
                  ResultIterSpace.CounterInit == nullptr ||
                  ResultIterSpace.CounterStep == nullptr);
@@ -2690,6 +2709,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
  
    // Last iteration number is (I1 * I2 * ... In) - 1, where I1, I2 ... In are
    // the iteration counts of the collapsed for loops.
+  // Precondition tests if there is at least one iteration (all conditions are
+  // true).
+  auto PreCond = ExprResult(IterSpaces[0].PreCond);
    auto N0 = IterSpaces[0].NumIterations;
    ExprResult LastIteration32 = WidenIterationCount(32 /* Bits */, N0, SemaRef);
    ExprResult LastIteration64 = WidenIterationCount(64 /* Bits */, N0, SemaRef);
@@ -2702,6 +2724,10 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
  
    Scope *CurScope = DSA.getCurScope();
    for (unsigned Cnt = 1; Cnt < NestedLoopCount; ++Cnt) {
+    if (PreCond.isUsable()) {
+      PreCond = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LAnd,
+                                   PreCond.get(), IterSpaces[Cnt].PreCond);
+    }
      auto N = IterSpaces[Cnt].NumIterations;
      AllCountsNeedLessThan32Bits &= C.getTypeSize(N->getType()) < 32;
      if (LastIteration32.isUsable())
@@ -2763,11 +2789,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
  
    SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
  
-  // Precondition tests if there is at least one iteration (LastIteration > 0).
-  ExprResult PreCond = SemaRef.BuildBinOp(
-      CurScope, InitLoc, BO_GT, LastIteration.get(),
-      SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
-
    QualType VType = LastIteration.get()->getType();
    // Build variables passed into runtime, nesessary for worksharing directives.
    ExprResult LB, UB, IL, ST, EUB;
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp

index a53af80..85f1ba6 100644 (file)
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -315,6 +315,31 @@ void runtime(float *a, float *b, float *c, float *d) {
  // CHECK: ret void
  }
  
+// CHECK-LABEL: test_precond
+void test_precond() {
+  // CHECK: [[A_ADDR:%.+]] = alloca i8,
+  // CHECK: [[I_ADDR:%.+]] = alloca i8,
+  char a = 0;
+  // CHECK: store i32 0, i32* [[IV_ADDR:%.+]],
+  // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]],
+  // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_ADDR]],
+  // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[CONV]], [[MUL]]
+  // CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+  // CHECK: store i8 [[CONV]], i8* [[I_ADDR]],
+  // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]],
+  // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32
+  // CHECK: [[CMP:%.+]] = icmp slt i32 [[CONV]], 10
+  // CHECK: br i1 [[CMP]], label %[[PRECOND_THEN:[^,]+]], label %[[PRECOND_END:[^,]+]]
+  // CHECK: [[PRECOND_THEN]]
+  // CHECK: call void @__kmpc_for_static_init_4
+#pragma omp for
+  for(char i = a; i < 10; ++i);
+  // CHECK: call void @__kmpc_for_static_fini
+  // CHECK: [[PRECOND_END]]
+}
+
  // TERM_DEBUG-LABEL: foo
  int foo() {return 0;};
  
diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp

index b2fe730..0baec9c 100644 (file)
--- a/clang/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp
@@ -40,7 +40,7 @@ T tmain() {
    S<T> var(3);
  #pragma omp parallel
  #pragma omp for firstprivate(t_var, vec, s_arr, var)
-  for (int i = 0; i < 0; ++i) {
+  for (int i = 0; i < 2; ++i) {
      vec[i] = t_var;
      s_arr[i] = var;
    }
@@ -146,7 +146,7 @@ int main() {
    return 0;
  #else
  #pragma omp for firstprivate(t_var, vec, s_arr, var)
-  for (int i = 0; i < 0; ++i) {
+  for (int i = 0; i < 2; ++i) {
      vec[i] = t_var;
      s_arr[i] = var;
    }
diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp

index b9e23ce..7d3337a 100644 (file)
--- a/clang/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp
@@ -67,12 +67,14 @@ int main() {
      // LAMBDA: alloca i{{[0-9]+}},
      // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
      // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+    // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+    // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
      // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
      // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
      // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
      // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
-    // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+    // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
      g = 1;
      // Check for final copying of private values back to original vars.
      // LAMBDA: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
@@ -86,8 +88,6 @@ int main() {
      // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
      // LAMBDA: br label %[[LAST_DONE]]
      // LAMBDA: [[LAST_DONE]]
-    // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
-    // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
      // LAMBDA: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
      [&]() {
        // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
@@ -119,13 +119,15 @@ int main() {
      // BLOCKS: alloca i{{[0-9]+}},
      // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
      // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+    // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+    // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
      // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
      // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
      // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
      // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
      // BLOCKS: call void {{%.+}}(i8
-    // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+    // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
      g = 1;
      // Check for final copying of private values back to original vars.
      // BLOCKS: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
@@ -139,8 +141,6 @@ int main() {
      // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
      // BLOCKS: br label %[[LAST_DONE]]
      // BLOCKS: [[LAST_DONE]]
-    // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
-    // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
      // BLOCKS: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
      g = 1;
      ^{
@@ -261,9 +261,11 @@ int main() {
  // Check for default initialization.
  // CHECK-NOT: [[X_PRIV]]
  
-// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
  // <Skip loop body>
-// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
  
  // Check for final copying of private values back to original vars.
  // CHECK: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
@@ -278,8 +280,7 @@ int main() {
  // CHECK-NEXT: br label %[[LAST_DONE]]
  // CHECK: [[LAST_DONE]]
  
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
  // CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
  // CHECK: ret void
  
diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp

index 63ae11b..3942fad 100644 (file)
--- a/clang/test/OpenMP/parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_codegen.cpp
@@ -47,11 +47,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
  // CHECK-NEXT: br label %{{.+}}
    }
  // CHECK: [[LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
  // CHECK: ret void
  }
@@ -95,11 +91,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
  // CHECK-NEXT: br label %{{.+}}
    }
  // CHECK: [[LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
  // CHECK: ret void
  }
@@ -162,11 +154,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
  // CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]]
  
  // CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
  // CHECK: ret void
  }
@@ -181,8 +169,6 @@ void dynamic1(float *a, float *b, float *c, float *d) {
  // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1)
  //
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
  // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
  // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
@@ -214,8 +200,6 @@ void dynamic1(float *a, float *b, float *c, float *d) {
    }
  // CHECK: [[LOOP1_END]]
  // CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
  // CHECK: ret void
  }
@@ -230,8 +214,6 @@ void guided7(float *a, float *b, float *c, float *d) {
  // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 36, i64 0, i64 16908287, i64 1, i64 7)
  //
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
  // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
  // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
@@ -263,8 +245,6 @@ void guided7(float *a, float *b, float *c, float *d) {
    }
  // CHECK: [[LOOP1_END]]
  // CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
  // CHECK: ret void
  }
@@ -332,8 +312,6 @@ void runtime(float *a, float *b, float *c, float *d) {
  // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1)
  //
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
  // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
  // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
  // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp

index 0111a27..c8998eb 100644 (file)
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -176,28 +176,10 @@ void simple(float *a, float *b, float *c, float *d) {
    }
  // CHECK: [[SIMPLE_LOOP5_END]]
  
+// CHECK-NOT: mul i32 %{{.+}}, 10
    #pragma omp simd
-// FIXME: I think we would get wrong result using 'unsigned' in the loop below.
-// So we'll need to add zero trip test for 'unsigned' counters.
-//
-// CHECK: store i32 0, i32* [[OMP_IV6:%[^,]+]]
-
-// CHECK: [[IV6:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID:[0-9]+]]
-// CHECK-NEXT: [[CMP6:%.+]] = icmp slt i32 [[IV6]], -8
-// CHECK-NEXT: br i1 [[CMP6]], label %[[SIMPLE_LOOP6_BODY:.+]], label %[[SIMPLE_LOOP6_END:[^,]+]]
-  for (int i=100; i<10; i+=10) {
-// CHECK: [[SIMPLE_LOOP6_BODY]]
-// Start of body: calculate i from IV:
-// CHECK: [[IV6_0:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
-// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV6_0]], 10
-// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 100, [[LC_IT_1]]
-// CHECK-NEXT: store i32 [[LC_IT_2]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
-
-// CHECK: [[IV6_2:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
-// CHECK-NEXT: [[ADD6_2:%.+]] = add nsw i32 [[IV6_2]], 1
-// CHECK-NEXT: store i32 [[ADD6_2]], i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]]
+  for (unsigned i=100; i<10; i+=10) {
    }
-// CHECK: [[SIMPLE_LOOP6_END]]
  
    int A;
    #pragma omp simd lastprivate(A)
@@ -205,8 +187,6 @@ void simple(float *a, float *b, float *c, float *d) {
  // Test checks that one iteration is separated in presence of lastprivate.
  //
  // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]]
-// CHECK: br i1 true, label %[[SIMPLE_IF7_THEN:.+]], label %[[SIMPLE_IF7_END:[^,]+]]
-// CHECK: [[SIMPLE_IF7_THEN]]
  // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
  // CHECK: [[SIMD_LOOP7_COND]]
  // CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
@@ -233,9 +213,6 @@ void simple(float *a, float *b, float *c, float *d) {
  // CHECK: [[LOAD_I:%.+]] = load i64, i64* [[ADDR_I]]
  // CHECK-NEXT: [[CONV_I:%.+]] = trunc i64 [[LOAD_I]] to i32
  //
-// CHECK: br label %[[SIMPLE_IF7_END]]
-// CHECK: [[SIMPLE_IF7_END]]
-//
  
  // CHECK: ret void
  }
author	Alexey Bataev <a.bataev@hotmail.com>
	Wed, 22 Apr 2015 11:59:37 +0000 (11:59 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Wed, 22 Apr 2015 11:59:37 +0000 (11:59 +0000)
clang/lib/CodeGen/CGStmtOpenMP.cpp		patch \| blob \| history
clang/lib/Sema/SemaOpenMP.cpp		patch \| blob \| history
clang/test/OpenMP/for_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/for_firstprivate_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/for_lastprivate_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/parallel_for_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/simd_codegen.cpp		patch \| blob \| history