From: Roman Lebedev Date: Thu, 2 Sep 2021 09:40:22 +0000 (+0300) Subject: Revert "[OpenMP][OpenMPIRBuilder] Implement loop unrolling." X-Git-Tag: upstream/15.0.7~32439 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=50634deaa54afc08962563ccb1f0c0a6266b64b5;p=platform%2Fupstream%2Fllvm.git Revert "[OpenMP][OpenMPIRBuilder] Implement loop unrolling." Breaks build with -DBUILD_SHARED_LIBS=ON ``` CMake Error: The inter-target dependency graph contains the following strongly connected component (cycle): "LLVMFrontendOpenMP" of type SHARED_LIBRARY depends on "LLVMPasses" (weak) "LLVMipo" of type SHARED_LIBRARY depends on "LLVMFrontendOpenMP" (weak) "LLVMCoroutines" of type SHARED_LIBRARY depends on "LLVMipo" (weak) "LLVMPasses" of type SHARED_LIBRARY depends on "LLVMCoroutines" (weak) depends on "LLVMipo" (weak) At least one of these targets is not a STATIC_LIBRARY. Cyclic dependencies are allowed only among static libraries. CMake Generate step failed. Build files cannot be regenerated correctly. ``` This reverts commit 707ce34b06190e275572c3c46843036db1bab6d1. --- diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2794b0f..7e900ec0 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10572,11 +10572,6 @@ public: /// an OpenMP loop directive. StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt); - /// Process a canonical OpenMP loop nest that can either be a canonical - /// literal loop (ForStmt or CXXForRangeStmt), or the generated loop of an - /// OpenMP loop transformation construct. - StmtResult ActOnOpenMPLoopnest(Stmt *AStmt); - /// End of OpenMP region. /// /// \param S Statement associated with the current OpenMP region. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index a9d1fd1..b965150 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1951,27 +1951,11 @@ llvm::CanonicalLoopInfo * CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); - // The caller is processing the loop-associated directive processing the \p - // Depth loops nested in \p S. Put the previous pending loop-associated - // directive to the stack. If the current loop-associated directive is a loop - // transformation directive, it will push its generated loops onto the stack - // such that together with the loops left here they form the combined loop - // nest for the parent loop-associated directive. - int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; - ExpectedOMPLoopDepth = Depth; - EmitStmt(S); assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); // The last added loop is the outermost one. - llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); - - // Pop the \p Depth loops requested by the call from that stack and restore - // the previous context. - OMPLoopNestStack.set_size(OMPLoopNestStack.size() - Depth); - ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; - - return Result; + return OMPLoopNestStack.back(); } void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { @@ -2601,46 +2585,6 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { } void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { - bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; - - if (UseOMPIRBuilder) { - auto DL = SourceLocToDebugLoc(S.getBeginLoc()); - const Stmt *Inner = S.getRawStmt(); - - // Consume nested loop. Clear the entire remaining loop stack because a - // fully unrolled loop is non-transformable. For partial unrolling the - // generated outer loop is pushed back to the stack. - llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); - OMPLoopNestStack.clear(); - - llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - - bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; - llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; - - if (S.hasClausesOfKind()) { - assert(ExpectedOMPLoopDepth == 0); - OMPBuilder.unrollLoopFull(DL, CLI); - } else if (auto *PartialClause = S.getSingleClause()) { - uint64_t Factor = 0; - if (Expr *FactorExpr = PartialClause->getFactor()) { - Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); - assert(Factor >= 1 && "Only positive factors are valid"); - } - OMPBuilder.unrollLoopPartial(DL, CLI, Factor, - NeedsUnrolledCLI ? &UnrolledCLI : nullptr); - } else { - OMPBuilder.unrollLoopHeuristic(DL, CLI); - } - - assert((!NeedsUnrolledCLI || UnrolledCLI) && - "NeedsUnrolledCLI implies UnrolledCLI to be set"); - if (UnrolledCLI) - OMPLoopNestStack.push_back(UnrolledCLI); - - return; - } - // This function is only called if the unrolled loop is not consumed by any // other loop-associated construct. Such a loop-associated construct will have // used the transformed AST. diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index b43f37c..1f05877 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -291,10 +291,6 @@ public: /// nest would extend. SmallVector OMPLoopNestStack; - /// Number of nested loop to be consumed by the last surrounding - /// loop-associated directive. - int ExpectedOMPLoopDepth = 0; - // CodeGen lambda for loops and support for ordered clause typedef llvm::function_ref diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 5f8f984..a45168d 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2562,7 +2562,8 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) { if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) && getLangOpts().OpenMPIRBuilder) - AssociatedStmt = Actions.ActOnOpenMPLoopnest(AssociatedStmt.get()); + AssociatedStmt = + Actions.ActOnOpenMPCanonicalLoop(AssociatedStmt.get()); } AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); } else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data || diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 0c678e8..b6e43d3 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5573,19 +5573,6 @@ StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) { LoopVarFunc, LVRef); } -StmtResult Sema::ActOnOpenMPLoopnest(Stmt *AStmt) { - // Handle a literal loop. - if (isa(AStmt) || isa(AStmt)) - return ActOnOpenMPCanonicalLoop(AStmt); - - // If not a literal loop, it must be the result of a loop transformation. - OMPExecutableDirective *LoopTransform = cast(AStmt); - assert( - isOpenMPLoopTransformationDirective(LoopTransform->getDirectiveKind()) && - "Loop transformation directive expected"); - return LoopTransform; -} - static ExprResult buildUserDefinedMapperRef(Sema &SemaRef, Scope *S, CXXScopeSpec &MapperIdScopeSpec, const DeclarationNameInfo &MapperId, diff --git a/clang/test/OpenMP/irbuilder_unroll_full.c b/clang/test/OpenMP/irbuilder_unroll_full.c deleted file mode 100644 index 79d2956..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_full.c +++ /dev/null @@ -1,153 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_full( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } -void unroll_full(float *a, float *b, float *c, float *d) { -#pragma omp unroll full - for (int i = 0; i < 2; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.full"} diff --git a/clang/test/OpenMP/irbuilder_unroll_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_heuristic.c deleted file mode 100644 index 4c590b2..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_heuristic.c +++ /dev/null @@ -1,153 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_heuristic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } - -void unroll_heuristic(float *a, float *b, float *c, float *d) { -#pragma omp unroll - for (int i = 0; i < 128; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 128, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c deleted file mode 100644 index 254dcc5..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c +++ /dev/null @@ -1,153 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_partial_factor( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } -void unroll_partial_factor(float *a, float *b, float *c, float *d) { -#pragma omp unroll partial(3) - for (int i = 0; i < 2; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 3} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c deleted file mode 100644 index 5452bf0..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ /dev/null @@ -1,222 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 13 -// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 13 -// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0 -// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]] -// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 13 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 13, %[[TMP13]] -// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP20]], %[[TMP23]] -// CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP25]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP26]] -// CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP28]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } - -void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(13) - for (int i = 0; i < n; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1 -// CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8 -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4 -// CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 13} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c deleted file mode 100644 index a86a5ef..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c +++ /dev/null @@ -1,200 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for_collapse( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[M:.+]], i32* %[[M_ADDR]], align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[M_ADDR]], align 4 -// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[J]], align 4 -// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0 -// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1 -// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64 -// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 2 -// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1 -// CHECK-NEXT: store i64 %[[SUB3]], i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_THEN]]: -// CHECK-NEXT: store i64 0, i64* %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: %[[TMP7:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ] -// CHECK-NEXT: store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP8:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: store i64 %[[TMP8]], i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_COND]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]: -// CHECK-NEXT: %[[TMP11:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 2 -// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]] -// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32 -// CHECK-NEXT: store i32 %[[CONV14]], i32* %[[I6]], align 4 -// CHECK-NEXT: %[[TMP12:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 2 -// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 2 -// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]] -// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 4 -// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]] -// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32 -// CHECK-NEXT: store i32 %[[CONV20]], i32* %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: store i32 %[[TMP14]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_COND]]: -// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4 -// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] -// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_RHS]]: -// CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 -// CHECK-NEXT: br label %[[LAND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_END]]: -// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ] -// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_BODY]]: -// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1 -// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]] -// CHECK-NEXT: store i32 %[[ADD27]], i32* %[[J]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM28:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX29:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM28]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX29]], align 4 -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[J]], align 4 -// CHECK-NEXT: %[[IDXPROM30:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX31:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM30]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX31]], align 4 -// CHECK-NEXT: %[[MUL32:.+]] = fmul float %[[TMP25]], %[[TMP28]] -// CHECK-NEXT: %[[ADD33:.+]] = fadd float %[[TMP22]], %[[MUL32]] -// CHECK-NEXT: %[[TMP29:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP30:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM34:.+]] = sext i32 %[[TMP30]] to i64 -// CHECK-NEXT: %[[ARRAYIDX35:.+]] = getelementptr inbounds float, float* %[[TMP29]], i64 %[[IDXPROM34]] -// CHECK-NEXT: %[[TMP31:.+]] = load float, float* %[[ARRAYIDX35]], align 4 -// CHECK-NEXT: %[[ADD36:.+]] = fadd float %[[TMP31]], %[[ADD33]] -// CHECK-NEXT: store float %[[ADD36]], float* %[[ARRAYIDX35]], align 4 -// CHECK-NEXT: br label %[[FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_INC]]: -// CHECK-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP32]], 1 -// CHECK-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_END]]: -// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_BODY_CONTINUE]]: -// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_INC]]: -// CHECK-NEXT: %[[TMP33:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[ADD37:.+]] = add nsw i64 %[[TMP33]], 1 -// CHECK-NEXT: store i64 %[[ADD37]], i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_END]]: -// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM38]]) -// CHECK-NEXT: br label %[[OMP_PRECOND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM39]]) -// CHECK-NEXT: ret void -// CHECK-NEXT: } -void unroll_partial_factor_for_collapse(int m, float *a, float *b, float *c, float *d) { -#pragma omp for collapse(2) - for (int i = 0; i < m; i++) { -#pragma omp unroll partial(4) - for (int j = 0; j < 8; j++) { - a[i] += b[i] + c[i] * d[j]; - } - } -} - -#endif // HEADER - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c deleted file mode 100644 index 15f4ced..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c +++ /dev/null @@ -1,152 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } -void unroll_partial_heuristic(float *a, float *b, float *c, float *d) { -#pragma omp unroll partial - for (int i = 0; i < 2; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c deleted file mode 100644 index 605c2be..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ /dev/null @@ -1,243 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -// REQUIRES: x86-registered-target - -// TODO: The unroll-factor heuristic might be able to use the information that the trip count is constant, but currently is not able to determine that. - -#ifndef HEADER -#define HEADER - -double sind(double); - -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_constant_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 4 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 4, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP19]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV]]) -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP22]] to double -// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM5]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX6]], align 4 -// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP25]] to double -// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM9]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX10]], align 4 -// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP28]] to double -// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]] -// CHECK-NEXT: %[[TMP29:.+]] = load float, float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP29]] to double -// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]] -// CHECK-NEXT: %[[TMP30:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP31:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP31]] to i64 -// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, float* %[[TMP30]], i64 %[[IDXPROM14]] -// CHECK-NEXT: %[[TMP32:.+]] = load float, float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP32]] to double -// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]] -// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float -// CHECK-NEXT: store float %[[CONV18]], float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } - -void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *d, float *e, float offset) { -#pragma omp for -#pragma omp unroll partial - for (int i = 0; i < 128; i++) { - a[i] += sind(b[i]) * c[i] * d[i] * e[i] + offset; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 128, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c deleted file mode 100644 index d83a33b..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ /dev/null @@ -1,225 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -// REQUIRES: x86-registered-target - -#ifndef HEADER -#define HEADER - -double sind(double); - -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[M:.+]], i32* %[[M_ADDR]], align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[M_ADDR]], align 4 -// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[J]], align 4 -// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0 -// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1 -// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64 -// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 4 -// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1 -// CHECK-NEXT: store i64 %[[SUB3]], i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_THEN]]: -// CHECK-NEXT: store i64 0, i64* %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: %[[TMP7:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ] -// CHECK-NEXT: store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP8:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: store i64 %[[TMP8]], i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_COND]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]: -// CHECK-NEXT: %[[TMP11:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 4 -// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]] -// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32 -// CHECK-NEXT: store i32 %[[CONV14]], i32* %[[I6]], align 4 -// CHECK-NEXT: %[[TMP12:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 4 -// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 4 -// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]] -// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 2 -// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]] -// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32 -// CHECK-NEXT: store i32 %[[CONV20]], i32* %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: store i32 %[[TMP14]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_COND]]: -// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 2 -// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] -// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_RHS]]: -// CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 -// CHECK-NEXT: br label %[[LAND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_END]]: -// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ] -// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_BODY]]: -// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1 -// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]] -// CHECK-NEXT: store i32 %[[ADD27]], i32* %[[J]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV28:.+]] = fpext float %[[TMP22]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV28]]) -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM29:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX30:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM29]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX30]], align 4 -// CHECK-NEXT: %[[CONV31:.+]] = fpext float %[[TMP25]] to double -// CHECK-NEXT: %[[MUL32:.+]] = fmul double %[[CALL]], %[[CONV31]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM33:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX34:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM33]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX34]], align 4 -// CHECK-NEXT: %[[CONV35:.+]] = fpext float %[[TMP28]] to double -// CHECK-NEXT: %[[MUL36:.+]] = fmul double %[[MUL32]], %[[CONV35]] -// CHECK-NEXT: %[[TMP29:.+]] = load float*, float** %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP30:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM37:.+]] = sext i32 %[[TMP30]] to i64 -// CHECK-NEXT: %[[ARRAYIDX38:.+]] = getelementptr inbounds float, float* %[[TMP29]], i64 %[[IDXPROM37]] -// CHECK-NEXT: %[[TMP31:.+]] = load float, float* %[[ARRAYIDX38]], align 4 -// CHECK-NEXT: %[[CONV39:.+]] = fpext float %[[TMP31]] to double -// CHECK-NEXT: %[[MUL40:.+]] = fmul double %[[MUL36]], %[[CONV39]] -// CHECK-NEXT: %[[TMP32:.+]] = load float, float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV41:.+]] = fpext float %[[TMP32]] to double -// CHECK-NEXT: %[[ADD42:.+]] = fadd double %[[MUL40]], %[[CONV41]] -// CHECK-NEXT: %[[TMP33:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP34:.+]] = load i32, i32* %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM43:.+]] = sext i32 %[[TMP34]] to i64 -// CHECK-NEXT: %[[ARRAYIDX44:.+]] = getelementptr inbounds float, float* %[[TMP33]], i64 %[[IDXPROM43]] -// CHECK-NEXT: %[[TMP35:.+]] = load float, float* %[[ARRAYIDX44]], align 4 -// CHECK-NEXT: %[[CONV45:.+]] = fpext float %[[TMP35]] to double -// CHECK-NEXT: %[[ADD46:.+]] = fadd double %[[CONV45]], %[[ADD42]] -// CHECK-NEXT: %[[CONV47:.+]] = fptrunc double %[[ADD46]] to float -// CHECK-NEXT: store float %[[CONV47]], float* %[[ARRAYIDX44]], align 4 -// CHECK-NEXT: br label %[[FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_INC]]: -// CHECK-NEXT: %[[TMP36:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP36]], 1 -// CHECK-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_END]]: -// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_BODY_CONTINUE]]: -// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_INC]]: -// CHECK-NEXT: %[[TMP37:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[ADD48:.+]] = add nsw i64 %[[TMP37]], 1 -// CHECK-NEXT: store i64 %[[ADD48]], i64* %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_END]]: -// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK-NEXT: br label %[[OMP_PRECOND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM50]]) -// CHECK-NEXT: ret void -// CHECK-NEXT: } - -void unroll_partial_heuristic_for(int m, float *a, float *b, float *c, float *d, float *e, float offset) { -#pragma omp for collapse(2) - for (int i = 0; i < m; i++) { -#pragma omp unroll partial - for (int j = 0; j < 8; j++) { - a[i] += sind(b[i]) * c[i] * d[i] * e[i] + offset; - } - } -} - -#endif // HEADER - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c deleted file mode 100644 index 07a1b59..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ /dev/null @@ -1,248 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -// REQUIRES: x86-registered-target - -#ifndef HEADER -#define HEADER - -double sind(double); - -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_runtime_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0 -// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]] -// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 4 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 4, %[[TMP13]] -// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP20]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV]]) -// CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP23]] to double -// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]] -// CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP25]] to i64 -// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM5]] -// CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX6]], align 4 -// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP26]] to double -// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]] -// CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP28]] to i64 -// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM9]] -// CHECK-NEXT: %[[TMP29:.+]] = load float, float* %[[ARRAYIDX10]], align 4 -// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP29]] to double -// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]] -// CHECK-NEXT: %[[TMP30:.+]] = load float, float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP30]] to double -// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]] -// CHECK-NEXT: %[[TMP31:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP32:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP32]] to i64 -// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, float* %[[TMP31]], i64 %[[IDXPROM14]] -// CHECK-NEXT: %[[TMP33:.+]] = load float, float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP33]] to double -// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]] -// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float -// CHECK-NEXT: store float %[[CONV18]], float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } - -void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, float *d, float *e, float offset) { -#pragma omp for -#pragma omp unroll partial - for (int i = 0; i < n; i++) { - a[i] += sind(b[i]) * c[i] * d[i] * e[i] + offset; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1 -// CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8 -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4 -// CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c deleted file mode 100644 index 5bb8911..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ /dev/null @@ -1,215 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } - -void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(2) - for (int i = 0; i < 2; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c deleted file mode 100644 index ebc3df4..0000000 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ /dev/null @@ -1,197 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -// CHECK-LABEL: define {{.*}}@unroll_unroll_partial_heuristic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 8 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 8 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP7:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP8:.+]] = select i1 %[[TMP7]], i32 %[[TMP4]], i32 8 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP9:.+]] = mul nuw i32 8, %[[OMP_FLOOR0_IV]] -// CHECK-NEXT: %[[TMP10:.+]] = add nuw i32 %[[TMP9]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP10]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP11:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP12]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP11]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP13:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP14:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP15]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP14]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP16:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP13]], %[[TMP16]] -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP19]] -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]], !llvm.loop ![[LOOP6:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } -void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { -#pragma omp unroll partial -#pragma omp unroll partial - for (int i = 0; i < 2; i++) { - a[i] = b[i] * c[i] * d[i]; - } -} - -#endif // HEADER - -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } - - -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 8} -// CHECK: ![[LOOP6]] = distinct !{![[LOOP6]], ![[LOOPPROP4]]} diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 8625f75..5983f98 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -1305,10 +1305,6 @@ bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name); llvm::Optional getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name); -/// Find named metadata for a loop with an integer value. Return \p Default if -/// not set. -int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0); - /// Find string metadata for loop /// /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index c41bdd0..b982e5b 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -475,48 +475,6 @@ public: tileLoops(DebugLoc DL, ArrayRef Loops, ArrayRef TileSizes); - /// Fully unroll a loop. - /// - /// Instead of unrolling the loop immediately (and duplicating its body - /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop - /// metadata. - /// - /// \param DL Debug location for instructions added by unrolling. - /// \param Loop The loop to unroll. The loop will be invalidated. - void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); - - /// Fully or partially unroll a loop. How the loop is unrolled is determined - /// using LLVM's LoopUnrollPass. - /// - /// \param DL Debug location for instructions added by unrolling. - /// \param Loop The loop to unroll. The loop will be invalidated. - void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); - - /// Partially unroll a loop. - /// - /// The CanonicalLoopInfo of the unrolled loop for use with chained - /// loop-associated directive can be requested using \p UnrolledCLI. Not - /// needing the CanonicalLoopInfo allows more efficient code generation by - /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. - /// A loop-associated directive applied to the unrolled loop needs to know the - /// new trip count which means that if using a heuristically determined unroll - /// factor (\p Factor == 0), that factor must be computed immediately. We are - /// using the same logic as the LoopUnrollPass to derived the unroll factor, - /// but which assumes that some canonicalization has taken place (e.g. - /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform - /// better when the unrolled loop's CanonicalLoopInfo is not needed. - /// - /// \param DL Debug location for instructions added by unrolling. - /// \param Loop The loop to unroll. The loop will be invalidated. - /// \param Factor The factor to unroll the loop by. A factor of 0 - /// indicates that a heuristic should be used to determine - /// the unroll-factor. - /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the - /// partially unrolled loop. Otherwise, uses loop metadata - /// to defer unrolling to the LoopUnrollPass. - void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, - CanonicalLoopInfo **UnrolledCLI); - /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 081578e..66aab4c 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -1102,11 +1102,6 @@ llvm::Optional llvm::getOptionalIntLoopAttribute(const Loop *TheLoop, return IntMD->getSExtValue(); } -int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name, - int Default) { - return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default); -} - static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; bool llvm::hasMustProgress(const Loop *L) { diff --git a/llvm/lib/Frontend/OpenMP/CMakeLists.txt b/llvm/lib/Frontend/OpenMP/CMakeLists.txt index b026e5c..17ca2c6 100644 --- a/llvm/lib/Frontend/OpenMP/CMakeLists.txt +++ b/llvm/lib/Frontend/OpenMP/CMakeLists.txt @@ -14,6 +14,5 @@ add_llvm_component_library(LLVMFrontendOpenMP LINK_COMPONENTS Core Support - Passes TransformUtils ) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7724fd8..29fe2a8 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -16,23 +16,15 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Value.h" -#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/Transforms/Utils/LoopPeel.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -47,12 +39,6 @@ static cl::opt "'as-if' properties of runtime calls."), cl::init(false)); -static cl::opt UnrollThresholdFactor( - "openmp-ir-builder-unroll-threshold-factor", cl::Hidden, - cl::desc("Factor for the unroll threshold to account for code " - "simplifications still taking place"), - cl::init(1.5)); - void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); @@ -2070,281 +2056,6 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef Loops, return Result; } -/// Attach loop metadata \p Properties to the loop described by \p Loop. If the -/// loop already has metadata, the loop properties are appended. -static void addLoopMetadata(CanonicalLoopInfo *Loop, - ArrayRef Properties) { - assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo"); - - // Nothing to do if no property to attach. - if (Properties.empty()) - return; - - LLVMContext &Ctx = Loop->getFunction()->getContext(); - SmallVector NewLoopProperties; - NewLoopProperties.push_back(nullptr); - - // If the loop already has metadata, prepend it to the new metadata. - BasicBlock *Latch = Loop->getLatch(); - assert(Latch && "A valid CanonicalLoopInfo must have a unique latch"); - MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop); - if (Existing) - append_range(NewLoopProperties, drop_begin(Existing->operands(), 1)); - - append_range(NewLoopProperties, Properties); - MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); - LoopID->replaceOperandWith(0, LoopID); - - Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); -} - -void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) { - LLVMContext &Ctx = Builder.getContext(); - addLoopMetadata( - Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")), - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))}); -} - -void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) { - LLVMContext &Ctx = Builder.getContext(); - addLoopMetadata( - Loop, { - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")), - }); -} - -/// Create the TargetMachine object to query the backend for optimization -/// preferences. -/// -/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but -/// e.g. Clang does not pass it to its CodeGen layer and creates it only when -/// needed for the LLVM pass pipline. We use some default options to avoid -/// having to pass too many settings from the frontend that probably do not -/// matter. -/// -/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial -/// method. If we are going to use TargetMachine for more purposes, especially -/// those that are sensitive to TargetOptions, RelocModel and CodeModel, it -/// might become be worth requiring front-ends to pass on their TargetMachine, -/// or at least cache it between methods. Note that while fontends such as Clang -/// have just a single main TargetMachine per translation unit, "target-cpu" and -/// "target-features" that determine the TargetMachine are per-function and can -/// be overrided using __attribute__((target("OPTIONS"))). -static std::unique_ptr -createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) { - Module *M = F->getParent(); - - StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString(); - StringRef Features = F->getFnAttribute("target-features").getValueAsString(); - const std::string &Triple = M->getTargetTriple(); - - std::string Error; - const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); - if (!TheTarget) - return {}; - - llvm::TargetOptions Options; - return std::unique_ptr(TheTarget->createTargetMachine( - Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None, - OptLevel)); -} - -/// Heuristically determine the best-performant unroll factor for \p CLI. This -/// depends on the target processor. We are re-using the same heuristics as the -/// LoopUnrollPass. -static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) { - Function *F = CLI->getFunction(); - - // Assume the user requests the most aggressive unrolling, even if the rest of - // the code is optimized using a lower setting. - CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive; - std::unique_ptr TM = createTargetMachine(F, OptLevel); - - llvm::PassBuilder PB; - FunctionAnalysisManager FAM; - PB.registerFunctionAnalyses(FAM); - TargetIRAnalysis TIRA; - if (TM) - TIRA = TargetIRAnalysis( - [&](const Function &F) { return TM->getTargetTransformInfo(F); }); - TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM); - ScalarEvolutionAnalysis SEA; - ScalarEvolution &&SE = SEA.run(*F, FAM); - DominatorTreeAnalysis DTA; - DominatorTree &&DT = DTA.run(*F, FAM); - LoopAnalysis LIA; - LoopInfo &&LI = LIA.run(*F, FAM); - AssumptionAnalysis ACT; - AssumptionCache &&AC = ACT.run(*F, FAM); - OptimizationRemarkEmitter ORE{F}; - - Loop *L = LI.getLoopFor(CLI->getHeader()); - assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop"); - - TargetTransformInfo::UnrollingPreferences UP = - gatherUnrollingPreferences(L, SE, TTI, - /*BlockFrequencyInfo=*/nullptr, - /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel, - /*UserThreshold=*/None, - /*UserCount=*/None, - /*UserAllowPartial=*/true, - /*UserAllowRuntime=*/true, - /*UserUpperBound=*/None, - /*UserFullUnrollMaxCount=*/None); - - UP.Force = true; - - // Account for additional optimizations taking place before the LoopUnrollPass - // would unroll the loop. - UP.Threshold *= UnrollThresholdFactor; - UP.PartialThreshold *= UnrollThresholdFactor; - - // Use normal unroll factors even if the rest of the code is optimized for - // size. - UP.OptSizeThreshold = UP.Threshold; - UP.PartialOptSizeThreshold = UP.PartialThreshold; - - LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n" - << " Threshold=" << UP.Threshold << "\n" - << " PartialThreshold=" << UP.PartialThreshold << "\n" - << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n" - << " PartialOptSizeThreshold=" - << UP.PartialOptSizeThreshold << "\n"); - - // Disable peeling. - TargetTransformInfo::PeelingPreferences PP = - gatherPeelingPreferences(L, SE, TTI, - /*UserAllowPeeling=*/false, - /*UserAllowProfileBasedPeeling=*/false, - /*UserUnrollingSpecficValues=*/false); - - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, &AC, EphValues); - - // Assume that reads and writes to stack variables can be eliminated by - // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's - // size. - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - Value *Ptr; - if (auto *Load = dyn_cast(&I)) { - Ptr = Load->getPointerOperand(); - } else if (auto *Store = dyn_cast(&I)) { - Ptr = Store->getPointerOperand(); - } else - continue; - - Ptr = Ptr->stripPointerCasts(); - - if (auto *Alloca = dyn_cast(Ptr)) { - if (Alloca->getParent() == &F->getEntryBlock()) - EphValues.insert(&I); - } - } - } - - unsigned NumInlineCandidates; - bool NotDuplicatable; - bool Convergent; - unsigned LoopSize = - ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, - TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n"); - - // Loop is not unrollable if the loop contains certain instructions. - if (NotDuplicatable || Convergent) { - LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n"); - return 1; - } - - // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might - // be able to use it. - int TripCount = 0; - int MaxTripCount = 0; - bool MaxOrZero = false; - unsigned TripMultiple = 0; - - bool UseUpperBound = false; - computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount, - MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP, - UseUpperBound); - unsigned Factor = UP.Count; - LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n"); - - // This function returns 1 to signal to not unroll a loop. - if (Factor == 0) - return 1; - return Factor; -} - -void OpenMPIRBuilder::unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, - int32_t Factor, - CanonicalLoopInfo **UnrolledCLI) { - assert(Factor >= 0 && "Unroll factor must not be negative"); - - Function *F = Loop->getFunction(); - LLVMContext &Ctx = F->getContext(); - - // If the unrolled loop is not used for another loop-associated directive, it - // is sufficient to add metadata for the LoopUnrollPass. - if (!UnrolledCLI) { - SmallVector LoopMetadata; - LoopMetadata.push_back( - MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable"))); - - if (Factor >= 1) { - ConstantAsMetadata *FactorConst = ConstantAsMetadata::get( - ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor))); - LoopMetadata.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})); - } - - addLoopMetadata(Loop, LoopMetadata); - return; - } - - // Heuristically determine the unroll factor. - if (Factor == 0) - Factor = computeHeuristicUnrollFactor(Loop); - - // No change required with unroll factor 1. - if (Factor == 1) { - *UnrolledCLI = Loop; - return; - } - - assert(Factor >= 2 && - "unrolling only makes sense with a factor of 2 or larger"); - - Type *IndVarTy = Loop->getIndVarType(); - - // Apply partial unrolling by tiling the loop by the unroll-factor, then fully - // unroll the inner loop. - Value *FactorVal = - ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor, - /*isSigned=*/false)); - std::vector LoopNest = - tileLoops(DL, {Loop}, {FactorVal}); - assert(LoopNest.size() == 2 && "Expect 2 loops after tiling"); - *UnrolledCLI = LoopNest[0]; - CanonicalLoopInfo *InnerLoop = LoopNest[1]; - - // LoopUnrollPass can only fully unroll loops with constant trip count. - // Unroll by the unroll factor with a fallback epilog for the remainder - // iterations if necessary. - ConstantAsMetadata *FactorConst = ConstantAsMetadata::get( - ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor))); - addLoopMetadata( - InnerLoop, - {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")), - MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})}); - -#ifndef NDEBUG - (*UnrolledCLI)->assertOK(); -#endif -} - OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 40d0844..9c9893c 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -15,7 +15,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "llvm/Passes/PassBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" @@ -143,40 +142,6 @@ protected: M.reset(); } - /// Create a function with a simple loop that calls printf using the logical - /// loop counter for use with tests that need a CanonicalLoopInfo object. - CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL, - OpenMPIRBuilder &OMPBuilder, - Instruction **Call = nullptr, - BasicBlock **BodyCode = nullptr) { - OMPBuilder.initialize(); - F->setName("func"); - - IRBuilder<> Builder(BB); - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - Value *TripCount = F->getArg(0); - - auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP, - llvm::Value *LC) { - Builder.restoreIP(CodeGenIP); - if (BodyCode) - *BodyCode = Builder.GetInsertBlock(); - - // Add something that consumes the induction variable to the body. - CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC}); - if (Call) - *Call = CallInst; - }; - CanonicalLoopInfo *Loop = - OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount); - - // Finalize the function. - Builder.restoreIP(Loop->getAfterIP()); - Builder.CreateRetVoid(); - - return Loop; - } - LLVMContext Ctx; std::unique_ptr M; Function *F; @@ -1323,11 +1288,30 @@ TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { } TEST_F(OpenMPIRBuilderTest, TileSingleLoop) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); - Instruction *Call; - BasicBlock *BodyCode; + OMPBuilder.initialize(); + F->setName("func"); + + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + Value *TripCount = F->getArg(0); + + BasicBlock *BodyCode = nullptr; + Instruction *Call = nullptr; + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { + Builder.restoreIP(CodeGenIP); + BodyCode = Builder.GetInsertBlock(); + + // Add something that consumes the induction variable to the body. + Call = createPrintfCall(Builder, "%d\\n", {LC}); + }; CanonicalLoopInfo *Loop = - buildSingleLoopFunction(DL, OMPBuilder, &Call, &BodyCode); + OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount); + + // Finalize the function. + Builder.restoreIP(Loop->getAfterIP()); + Builder.CreateRetVoid(); Instruction *OrigIndVar = Loop->getIndVar(); EXPECT_EQ(Call->getOperand(1), OrigIndVar); @@ -1664,86 +1648,6 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { EXPECT_FALSE(verifyModule(*M, &errs())); } -TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { - OpenMPIRBuilder OMPBuilder(*M); - - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); - - // Unroll the loop. - OMPBuilder.unrollLoopFull(DL, CLI); - - OMPBuilder.finalize(); - EXPECT_FALSE(verifyModule(*M, &errs())); - - PassBuilder PB; - FunctionAnalysisManager FAM; - PB.registerFunctionAnalyses(FAM); - LoopInfo &LI = FAM.getResult(*F); - - const std::vector &TopLvl = LI.getTopLevelLoops(); - EXPECT_EQ(TopLvl.size(), 1u); - - Loop *L = TopLvl.front(); - EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); - EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full")); -} - -TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { - OpenMPIRBuilder OMPBuilder(*M); - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); - - // Unroll the loop. - CanonicalLoopInfo *UnrolledLoop = nullptr; - OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop); - ASSERT_NE(UnrolledLoop, nullptr); - - OMPBuilder.finalize(); - EXPECT_FALSE(verifyModule(*M, &errs())); - UnrolledLoop->assertOK(); - - PassBuilder PB; - FunctionAnalysisManager FAM; - PB.registerFunctionAnalyses(FAM); - LoopInfo &LI = FAM.getResult(*F); - - const std::vector &TopLvl = LI.getTopLevelLoops(); - EXPECT_EQ(TopLvl.size(), 1u); - Loop *Outer = TopLvl.front(); - EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader()); - EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch()); - EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond()); - EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit()); - - EXPECT_EQ(Outer->getSubLoops().size(), 1u); - Loop *Inner = Outer->getSubLoops().front(); - - EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable")); - EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5); -} - -TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) { - OpenMPIRBuilder OMPBuilder(*M); - - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); - - // Unroll the loop. - OMPBuilder.unrollLoopHeuristic(DL, CLI); - - OMPBuilder.finalize(); - EXPECT_FALSE(verifyModule(*M, &errs())); - - PassBuilder PB; - FunctionAnalysisManager FAM; - PB.registerFunctionAnalyses(FAM); - LoopInfo &LI = FAM.getResult(*F); - - const std::vector &TopLvl = LI.getTopLevelLoops(); - EXPECT_EQ(TopLvl.size(), 1u); - - Loop *L = TopLvl.front(); - EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); -} - TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M);