From 3e7415a0ff30f7d310af4f8ee24269aa450889ba Mon Sep 17 00:00:00 2001 From: Peixin-Qiao Date: Fri, 1 Apr 2022 16:17:29 +0800 Subject: [PATCH] [OMPIRBuilder] Support ordered clause specified without parameter This patch supports ordered clause specified without parameter in worksharing-loop directive in the OpenMPIRBuilder and lowering MLIR to LLVM IR. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D114940 --- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h | 12 +- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 5 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 25 ++- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 85 +++++++++- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 7 +- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 69 ++++++-- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 179 ++++++++++++++++++++- 7 files changed, 358 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 8495459..d4757f6 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -78,8 +78,6 @@ enum class IdentFlag { enum class OMPScheduleType { StaticChunked = 33, Static = 34, // static unspecialized - DistributeChunked = 91, - Distribute = 92, DynamicChunked = 35, GuidedChunked = 36, // guided unspecialized Runtime = 37, @@ -89,6 +87,16 @@ enum class OMPScheduleType { GuidedSimd = 46, // guided with chunk adjustment RuntimeSimd = 47, // runtime with chunk adjustment + OrderedStaticChunked = 65, + OrderedStatic = 66, // ordered static unspecialized + OrderedDynamicChunked = 67, + OrderedGuidedChunked = 68, + OrderedRuntime = 69, + OrderedAuto = 70, // ordered auto + + DistributeChunked = 91, // distribute static chunked + Distribute = 92, // distribute static unspecialized + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 290748d..d770651 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -403,13 +403,16 @@ public: /// the loop. /// \param Chunk The size of loop chunk considered as a unit when /// scheduling. If \p nullptr, defaults to 1. + /// \param Ordered Indicates whether the ordered clause is specified without + /// parameter. /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, - Value *Chunk = nullptr); + Value *Chunk = nullptr, + bool Ordered = false); /// Modifies the canonical loop to be a workshare loop. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 3cde264..0a9dfde 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1903,9 +1903,24 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } +/// Returns an LLVM function to call for finalizing the dynamic loop using +/// depending on `type`. Only i32 and i64 are supported by the runtime. Always +/// interpret integers as unsigned similarly to CanonicalLoopInfo. +static FunctionCallee +getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u); + if (Bitwidth == 64) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) { + OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk, bool Ordered) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -1946,6 +1961,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( BasicBlock *Header = CLI->getHeader(); BasicBlock *Exit = CLI->getExit(); BasicBlock *Cond = CLI->getCond(); + BasicBlock *Latch = CLI->getLatch(); InsertPointTy AfterIP = CLI->getAfterIP(); // The CLI will be "broken" in the code below, as the loop is no longer @@ -2005,6 +2021,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( assert(BI->getSuccessor(1) == Exit); BI->setSuccessor(1, OuterCond); + // Call the "fini" function if "ordered" is present in wsloop directive. + if (Ordered) { + Builder.SetInsertPoint(&Latch->back()); + FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this); + Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum}); + } + // Add the barrier if requested. if (NeedsBarrier) { Builder.SetInsertPoint(&Exit->back()); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 3cffabe..6a58702 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -2089,11 +2089,13 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { InsertPointTy AfterIP = CLI->getAfterIP(); BasicBlock *Preheader = CLI->getPreheader(); BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *LatchBlock = CLI->getLatch(); Value *IV = CLI->getIndVar(); InsertPointTy EndIP = OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType, - /*NeedsBarrier=*/true, ChunkVal); + /*NeedsBarrier=*/true, ChunkVal, + /*Ordered=*/false); // The returned value should be the "after" point. ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); @@ -2146,6 +2148,10 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_EQ(OrigUpperBound->getValue(), 21); EXPECT_EQ(OrigStride->getValue(), 1); + CallInst *FiniCall = dyn_cast( + &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); + EXPECT_EQ(FiniCall, nullptr); + // The original loop iterator should only be used in the condition, in the // increment and in the statement that adds the lower bound to it. EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); @@ -2181,6 +2187,83 @@ INSTANTIATE_TEST_SUITE_P( omp::OMPScheduleType::Runtime | omp::OMPScheduleType::ModifierMonotonic)); +TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + omp::OMPScheduleType SchedType = omp::OMPScheduleType::OrderedStaticChunked; + uint32_t ChunkSize = 1; + Type *LCTy = Type::getInt32Ty(Ctx); + Value *StartVal = ConstantInt::get(LCTy, 10); + Value *StopVal = ConstantInt::get(LCTy, 52); + Value *StepVal = ConstantInt::get(LCTy, 2); + Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); + auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; + + CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Loc, LoopBodyGen, StartVal, StopVal, StepVal, + /*IsSigned=*/false, /*InclusiveStop=*/false); + + Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); + InsertPointTy AllocaIP = Builder.saveIP(); + + // Collect all the info from CLI, as it isn't usable after the call to + // createDynamicWorkshareLoop. + InsertPointTy AfterIP = CLI->getAfterIP(); + BasicBlock *Preheader = CLI->getPreheader(); + BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *LatchBlock = CLI->getLatch(); + Value *IV = CLI->getIndVar(); + + InsertPointTy EndIP = + OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType, + /*NeedsBarrier=*/true, ChunkVal, + /*Ordered=*/true); + + // Add a termination to our block and check that it is internally consistent. + Builder.restoreIP(EndIP); + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + CallInst *InitCall = nullptr; + for (Instruction &EI : *Preheader) { + Instruction *Cur = &EI; + if (isa(Cur)) { + InitCall = cast(Cur); + if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u") + break; + InitCall = nullptr; + } + } + EXPECT_NE(InitCall, nullptr); + EXPECT_EQ(InitCall->arg_size(), 7U); + ConstantInt *SchedVal = cast(InitCall->getArgOperand(2)); + EXPECT_EQ(SchedVal->getValue(), static_cast(SchedType)); + + CallInst *FiniCall = dyn_cast( + &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); + ASSERT_NE(FiniCall, nullptr); + EXPECT_EQ(FiniCall->getCalledFunction()->getName(), + "__kmpc_dispatch_fini_4u"); + EXPECT_EQ(FiniCall->arg_size(), 2U); + EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); + EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); + + // The original loop iterator should only be used in the condition, in the + // increment and in the statement that adds the lower bound to it. + EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); + + // The exit block should contain the barrier call, plus the call to obtain + // the thread ID. + size_t NumCallsInExitBlock = + count_if(*ExitBlock, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumCallsInExitBlock, 2u); +} + TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 0bc267c..4444ad2 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -280,9 +280,9 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> { def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, AllTypesMatch<["lowerBound", "upperBound", "step"]>, RecursiveSideEffects, ReductionClauseInterface]> { - let summary = "workshare loop construct"; + let summary = "worksharing-loop construct"; let description = [{ - The workshare loop construct specifies that the iterations of the loop(s) + The worksharing-loop construct specifies that the iterations of the loop(s) will be executed in parallel by threads in the current context. These iterations are spread across threads that already exist in the enclosing parallel region. The lower and upper bounds specify a half-open range: the @@ -332,7 +332,8 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, implicit barrier at the end of the loop. The optional `ordered_val` attribute specifies how many loops are associated - with the do loop construct. + with the worksharing-loop construct. The value of zero refers to the ordered + clause specified without parameter. The optional `order` attribute specifies which order the iterations of the associate loops are executed in. Currently the only option for this diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 50f1ef6..0b10550 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -799,32 +799,63 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, bool isSimd = loop.simd_modifier(); - if (schedule == omp::ClauseScheduleKind::Static) { + // The orderedVal refers to the value obtained from the ordered[(n)] clause. + // orderedVal == -1: No ordered[(n)] clause specified. + // orderedVal == 0: The ordered clause specified without a parameter. + // orderedVal > 0: The ordered clause specified with a parameter (n). + // TODO: Handle doacross loop init when orderedVal is greater than 0. + int64_t orderedVal = + loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1; + if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) { ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), llvm::omp::OMP_SCHEDULE_Static, chunk); } else { llvm::omp::OMPScheduleType schedType; switch (schedule) { + case omp::ClauseScheduleKind::Static: + if (loop.schedule_chunk_var()) + schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked; + else + schedType = llvm::omp::OMPScheduleType::OrderedStatic; + break; case omp::ClauseScheduleKind::Dynamic: - schedType = llvm::omp::OMPScheduleType::DynamicChunked; + if (orderedVal == 0) + schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked; + else + schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: - if (isSimd) - schedType = llvm::omp::OMPScheduleType::GuidedSimd; - else - schedType = llvm::omp::OMPScheduleType::GuidedChunked; + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked; + } else { + if (isSimd) + schedType = llvm::omp::OMPScheduleType::GuidedSimd; + else + schedType = llvm::omp::OMPScheduleType::GuidedChunked; + } break; case omp::ClauseScheduleKind::Auto: - schedType = llvm::omp::OMPScheduleType::Auto; + if (orderedVal == 0) + schedType = llvm::omp::OMPScheduleType::OrderedAuto; + else + schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: - if (isSimd) - schedType = llvm::omp::OMPScheduleType::RuntimeSimd; - else - schedType = llvm::omp::OMPScheduleType::Runtime; + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedRuntime; + } else { + if (isSimd) + schedType = llvm::omp::OMPScheduleType::RuntimeSimd; + else + schedType = llvm::omp::OMPScheduleType::Runtime; + } break; default: + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedStatic; + break; + } llvm_unreachable("Unknown schedule value"); break; } @@ -841,9 +872,23 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, // Nothing to do here. break; } + } else { + // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the + // effect is as if the monotonic modifier is specified. Otherwise, unless + // the monotonic modifier is specified, the effect is as if the + // nonmonotonic modifier is specified. + // The monotonic is used by default in openmp runtime library, so no need + // to set it. + if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic || + schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked)) + schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; } + ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, - schedType, !loop.nowait(), chunk); + schedType, !loop.nowait(), chunk, + /*ordered*/ orderedVal == 0); } // Continue building IR after the loop. Note that the LoopInfo returned by diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index b072bf8..8963a64 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -496,7 +496,7 @@ llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64 %chunk_size_const = llvm.mlir.constant(2 : i16) : i16 omp.wsloop schedule(dynamic = %chunk_size_const : i16) for (%iv) : i64 = (%lb) to (%ub) step (%step) { - // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2) + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -517,7 +517,7 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32) omp.wsloop schedule(dynamic = %chunk_size_var : i16) for (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32 - // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -538,7 +538,7 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) omp.wsloop schedule(dynamic = %chunk_size_var : i64) for (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32 - // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -555,7 +555,7 @@ llvm.func @body(i32) llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () { omp.wsloop schedule(dynamic = %chunk_size : i32) for (%iv) : i32 = (%lb) to (%ub) step (%step) { - // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}}) + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}}) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -650,6 +650,10 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) llvm.return } +// ----- + +llvm.func @body(i64) + llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop schedule(runtime, simd) for (%iv) : i64 = (%lb) to (%ub) step (%step) { @@ -663,6 +667,10 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.return } +// ----- + +llvm.func @body(i64) + llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop schedule(guided, simd) for (%iv) : i64 = (%lb) to (%ub) step (%step) { @@ -720,6 +728,169 @@ llvm.func @simdloop_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : // ----- +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(static) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i32) + +llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () { + %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) + for (%iv) : i32 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1) + // CHECK: call void @__kmpc_dispatch_fini_4u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i32) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(dynamic) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(auto) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741894, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(runtime) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741893, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(guided) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741892, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(dynamic, monotonic) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + omp.critical.declare @mutex hint(contended) // CHECK-LABEL: @omp_critical -- 2.7.4