From: Alexey Bataev Date: Wed, 22 Apr 2015 11:59:37 +0000 (+0000) Subject: [OPENMP] Fix use of unsigned counters in loops with zero trip count. X-Git-Tag: llvmorg-3.7.0-rc1~6260 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=62dbb979c0bd3ab34d0b60b6fedfb66321d81dcf;p=platform%2Fupstream%2Fllvm.git [OPENMP] Fix use of unsigned counters in loops with zero trip count. Patch fixes bugs in codegen for loops with unsigned counters and zero trip count. Previously preconditions for all loops were built using logic (Upper - Lower) > 0. But if the loop is a loop with zero trip count, then Upper - Lower is < 0 only for signed integer, for unsigned we're running into an underflow situation. In this patch we're using original Lower(cast(S.getIterationVariable())->getDecl()); + bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{ + // Emit var without initialization. + auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl); + CGF.EmitAutoVarCleanups(VarEmission); + return VarEmission.getAllocatedAddress(); + }); + assert(IsRegistered && "counter already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + (void)PreCondScope.Privatize(); + // Initialize internal counter to 0 to calculate initial values of real + // counters. + LValue IV = CGF.EmitLValue(S.getIterationVariable()); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::getNullValue( + IV.getAddress()->getType()->getPointerElementType()), + CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true); + // Get initial values of real counters. + for (auto I : S.updates()) { + CGF.EmitIgnoredExpr(I); + } + // Check that loop is executed at least one time. + CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); +} + static void EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { @@ -704,7 +736,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { // Pragma 'simd' code depends on presence of 'lastprivate'. // If present, we have to separate last iteration of the loop: // - // if (LastIteration != 0) { + // if (PreCond) { // for (IV in 0..LastIteration-1) BODY; // BODY with updates of lastprivate vars; // ; @@ -712,10 +744,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { // // otherwise (when there's no lastprivate): // + // if (PreCond) { // for (IV in 0..LastIteration) BODY; // ; + // } // + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + RegionCounter Cnt = CGF.getPGORegionCounter(&S); + auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); + ContBlock = CGF.createBasicBlock("simd.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); + CGF.EmitBlock(ThenBlock); + Cnt.beginRegion(CGF.Builder); + } // Walk clauses and process safelen/lastprivate. bool SeparateIter = false; CGF.LoopStack.setParallel(); @@ -780,51 +830,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { } } - if (SeparateIter) { - // Emit: if (LastIteration > 0) - begin. - RegionCounter Cnt = CGF.getPGORegionCounter(&S); - auto ThenBlock = CGF.createBasicBlock("simd.if.then"); - auto ContBlock = CGF.createBasicBlock("simd.if.end"); - CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, - Cnt.getCount()); - CGF.EmitBlock(ThenBlock); - Cnt.beginRegion(CGF.Builder); - // Emit 'then' code. - { - OMPPrivateScope LoopScope(CGF); - EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); - EmitPrivateLinearVars(CGF, S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); - (void)LoopScope.Privatize(); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/true), S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); - CGF.EmitOMPLoopBody(S, /* SeparateIter */ true); + { + OMPPrivateScope LoopScope(CGF); + EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); + EmitPrivateLinearVars(CGF, S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(SeparateIter), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + if (SeparateIter) { + CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true); } - CGF.EmitOMPSimdFinal(S); - // Emit: if (LastIteration != 0) - end. + } + CGF.EmitOMPSimdFinal(S); + // Emit: if (PreCond) - end. + if (ContBlock) { CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); - } else { - { - OMPPrivateScope LoopScope(CGF); - EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); - EmitPrivateLinearVars(CGF, S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); - (void)LoopScope.Privatize(); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/false), S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); - } - CGF.EmitOMPSimdFinal(S); } }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); @@ -1010,12 +1037,22 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Check pre-condition. { // Skip the entire loop if we don't meet the precondition. - RegionCounter Cnt = getPGORegionCounter(&S); - auto ThenBlock = createBasicBlock("omp.precond.then"); - auto ContBlock = createBasicBlock("omp.precond.end"); - EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); - EmitBlock(ThenBlock); - Cnt.beginRegion(Builder); + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return false; + } else { + RegionCounter Cnt = getPGORegionCounter(&S); + auto *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + Cnt.getCount()); + EmitBlock(ThenBlock); + Cnt.beginRegion(Builder); + } // Emit 'then' code. { // Emit helper vars inits. @@ -1090,8 +1127,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } // We're now done with the loop, so jump to the continuation block. - EmitBranch(ContBlock); - EmitBlock(ContBlock, true); + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } } return HasLastprivateClause; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index fed0ac7..0cfb94e 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1977,6 +1977,8 @@ public: bool ShouldSubtractStep() const { return SubtractStep; } /// \brief Build the expression to calculate the number of iterations. Expr *BuildNumIterations(Scope *S, const bool LimitedType) const; + /// \brief Build the precondition expression for the loops. + Expr *BuildPreCond(Scope *S, Expr *Cond) const; /// \brief Build reference expression to the counter be used for codegen. Expr *BuildCounterVar() const; /// \brief Build initization of the counter be used for codegen. @@ -2380,6 +2382,19 @@ OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S, return Diff.get(); } +Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const { + // Try to build LB UB, where is <, >, <=, or >=. + bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics(); + SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true); + auto CondExpr = SemaRef.BuildBinOp( + S, DefaultLoc, TestIsLessOp ? (TestIsStrictOp ? BO_LT : BO_LE) + : (TestIsStrictOp ? BO_GT : BO_GE), + LB, UB); + SemaRef.getDiagnostics().setSuppressAllDiagnostics(Suppress); + // Otherwise use original loop conditon and evaluate it in runtime. + return CondExpr.isUsable() ? CondExpr.get() : Cond; +} + /// \brief Build reference expression to the counter be used for codegen. Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const { return DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(), @@ -2395,6 +2410,8 @@ Expr *OpenMPIterationSpaceChecker::BuildCounterStep() const { return Step; } /// \brief Iteration space of a single for loop. struct LoopIterationSpace { + /// \brief Condition of the loop. + Expr *PreCond; /// \brief This expression calculates the number of iterations in the loop. /// It is always possible to calculate it before starting the loop. Expr *NumIterations; @@ -2535,6 +2552,7 @@ static bool CheckOpenMPIterationSpace( return HasErrors; // Build the loop's iteration space representation. + ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond()); ResultIterSpace.NumIterations = ISC.BuildNumIterations( DSA.getCurScope(), /* LimitedType */ isOpenMPWorksharingDirective(DKind)); ResultIterSpace.CounterVar = ISC.BuildCounterVar(); @@ -2545,7 +2563,8 @@ static bool CheckOpenMPIterationSpace( ResultIterSpace.IncSrcRange = ISC.GetIncrementSrcRange(); ResultIterSpace.Subtract = ISC.ShouldSubtractStep(); - HasErrors |= (ResultIterSpace.NumIterations == nullptr || + HasErrors |= (ResultIterSpace.PreCond == nullptr || + ResultIterSpace.NumIterations == nullptr || ResultIterSpace.CounterVar == nullptr || ResultIterSpace.CounterInit == nullptr || ResultIterSpace.CounterStep == nullptr); @@ -2690,6 +2709,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr, // Last iteration number is (I1 * I2 * ... In) - 1, where I1, I2 ... In are // the iteration counts of the collapsed for loops. + // Precondition tests if there is at least one iteration (all conditions are + // true). + auto PreCond = ExprResult(IterSpaces[0].PreCond); auto N0 = IterSpaces[0].NumIterations; ExprResult LastIteration32 = WidenIterationCount(32 /* Bits */, N0, SemaRef); ExprResult LastIteration64 = WidenIterationCount(64 /* Bits */, N0, SemaRef); @@ -2702,6 +2724,10 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr, Scope *CurScope = DSA.getCurScope(); for (unsigned Cnt = 1; Cnt < NestedLoopCount; ++Cnt) { + if (PreCond.isUsable()) { + PreCond = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LAnd, + PreCond.get(), IterSpaces[Cnt].PreCond); + } auto N = IterSpaces[Cnt].NumIterations; AllCountsNeedLessThan32Bits &= C.getTypeSize(N->getType()) < 32; if (LastIteration32.isUsable()) @@ -2763,11 +2789,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr, SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin(); - // Precondition tests if there is at least one iteration (LastIteration > 0). - ExprResult PreCond = SemaRef.BuildBinOp( - CurScope, InitLoc, BO_GT, LastIteration.get(), - SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get()); - QualType VType = LastIteration.get()->getType(); // Build variables passed into runtime, nesessary for worksharing directives. ExprResult LB, UB, IL, ST, EUB; diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp index a53af80..85f1ba6 100644 --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -315,6 +315,31 @@ void runtime(float *a, float *b, float *c, float *d) { // CHECK: ret void } +// CHECK-LABEL: test_precond +void test_precond() { + // CHECK: [[A_ADDR:%.+]] = alloca i8, + // CHECK: [[I_ADDR:%.+]] = alloca i8, + char a = 0; + // CHECK: store i32 0, i32* [[IV_ADDR:%.+]], + // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]], + // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32 + // CHECK: [[IV:%.+]] = load i32, i32* [[IV_ADDR]], + // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1 + // CHECK: [[ADD:%.+]] = add nsw i32 [[CONV]], [[MUL]] + // CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 + // CHECK: store i8 [[CONV]], i8* [[I_ADDR]], + // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]], + // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32 + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CONV]], 10 + // CHECK: br i1 [[CMP]], label %[[PRECOND_THEN:[^,]+]], label %[[PRECOND_END:[^,]+]] + // CHECK: [[PRECOND_THEN]] + // CHECK: call void @__kmpc_for_static_init_4 +#pragma omp for + for(char i = a; i < 10; ++i); + // CHECK: call void @__kmpc_for_static_fini + // CHECK: [[PRECOND_END]] +} + // TERM_DEBUG-LABEL: foo int foo() {return 0;}; diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp index b2fe730..0baec9c 100644 --- a/clang/test/OpenMP/for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp @@ -40,7 +40,7 @@ T tmain() { S var(3); #pragma omp parallel #pragma omp for firstprivate(t_var, vec, s_arr, var) - for (int i = 0; i < 0; ++i) { + for (int i = 0; i < 2; ++i) { vec[i] = t_var; s_arr[i] = var; } @@ -146,7 +146,7 @@ int main() { return 0; #else #pragma omp for firstprivate(t_var, vec, s_arr, var) - for (int i = 0; i < 0; ++i) { + for (int i = 0; i < 2; ++i) { vec[i] = t_var; s_arr[i] = var; } diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp index b9e23ce..7d3337a 100644 --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -67,12 +67,14 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], - // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) + // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} + // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] + // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) - // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) + // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) g = 1; // Check for final copying of private values back to original vars. // LAMBDA: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], @@ -86,8 +88,6 @@ int main() { // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]], // LAMBDA: br label %[[LAST_DONE]] // LAMBDA: [[LAST_DONE]] - // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} - // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // LAMBDA: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]]) [&]() { // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) @@ -119,13 +119,15 @@ int main() { // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], - // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) + // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} + // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] + // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: call void {{%.+}}(i8 - // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) + // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) g = 1; // Check for final copying of private values back to original vars. // BLOCKS: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], @@ -139,8 +141,6 @@ int main() { // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]], // BLOCKS: br label %[[LAST_DONE]] // BLOCKS: [[LAST_DONE]] - // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} - // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // BLOCKS: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]]) g = 1; ^{ @@ -261,9 +261,11 @@ int main() { // Check for default initialization. // CHECK-NOT: [[X_PRIV]] -// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) +// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] +// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // -// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) +// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) // Check for final copying of private values back to original vars. // CHECK: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], @@ -278,8 +280,7 @@ int main() { // CHECK-NEXT: br label %[[LAST_DONE]] // CHECK: [[LAST_DONE]] -// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] -// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: ret void diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 63ae11b..3942fad 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -47,11 +47,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -95,11 +91,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -162,11 +154,7 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]] // CHECK: [[O_LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -181,8 +169,6 @@ void dynamic1(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] @@ -214,8 +200,6 @@ void dynamic1(float *a, float *b, float *c, float *d) { } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -230,8 +214,6 @@ void guided7(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 36, i64 0, i64 16908287, i64 1, i64 7) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] @@ -263,8 +245,6 @@ void guided7(float *a, float *b, float *c, float *d) { } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -332,8 +312,6 @@ void runtime(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp index 0111a27..c8998eb 100644 --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -176,28 +176,10 @@ void simple(float *a, float *b, float *c, float *d) { } // CHECK: [[SIMPLE_LOOP5_END]] +// CHECK-NOT: mul i32 %{{.+}}, 10 #pragma omp simd -// FIXME: I think we would get wrong result using 'unsigned' in the loop below. -// So we'll need to add zero trip test for 'unsigned' counters. -// -// CHECK: store i32 0, i32* [[OMP_IV6:%[^,]+]] - -// CHECK: [[IV6:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID:[0-9]+]] -// CHECK-NEXT: [[CMP6:%.+]] = icmp slt i32 [[IV6]], -8 -// CHECK-NEXT: br i1 [[CMP6]], label %[[SIMPLE_LOOP6_BODY:.+]], label %[[SIMPLE_LOOP6_END:[^,]+]] - for (int i=100; i<10; i+=10) { -// CHECK: [[SIMPLE_LOOP6_BODY]] -// Start of body: calculate i from IV: -// CHECK: [[IV6_0:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] -// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV6_0]], 10 -// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 100, [[LC_IT_1]] -// CHECK-NEXT: store i32 [[LC_IT_2]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] - -// CHECK: [[IV6_2:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] -// CHECK-NEXT: [[ADD6_2:%.+]] = add nsw i32 [[IV6_2]], 1 -// CHECK-NEXT: store i32 [[ADD6_2]], i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] + for (unsigned i=100; i<10; i+=10) { } -// CHECK: [[SIMPLE_LOOP6_END]] int A; #pragma omp simd lastprivate(A) @@ -205,8 +187,6 @@ void simple(float *a, float *b, float *c, float *d) { // Test checks that one iteration is separated in presence of lastprivate. // // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]] -// CHECK: br i1 true, label %[[SIMPLE_IF7_THEN:.+]], label %[[SIMPLE_IF7_END:[^,]+]] -// CHECK: [[SIMPLE_IF7_THEN]] // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]] // CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]] @@ -233,9 +213,6 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[LOAD_I:%.+]] = load i64, i64* [[ADDR_I]] // CHECK-NEXT: [[CONV_I:%.+]] = trunc i64 [[LOAD_I]] to i32 // -// CHECK: br label %[[SIMPLE_IF7_END]] -// CHECK: [[SIMPLE_IF7_END]] -// // CHECK: ret void }