loops with ordered clause must be generated the same way as dynamic loops, but with static scheduleing.
llvm-svn: 237788
OMP_sch_auto = 38,
/// \brief Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
- /// \brief Lower bound for 'nomerge' versions.
- OMP_nm_lower = 160,
+ OMP_ord_static_chunked = 65,
+ OMP_ord_static = 66,
+ OMP_ord_dynamic_chunked = 67,
+ OMP_ord_guided_chunked = 68,
+ OMP_ord_runtime = 69,
+ OMP_ord_auto = 70,
+ OMP_sch_default = OMP_sch_static,
};
/// \brief Map the OpenMP loop schedule to the runtime enumeration.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
- bool Chunked) {
+ bool Chunked, bool Ordered) {
switch (ScheduleKind) {
case OMPC_SCHEDULE_static:
- return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
+ return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
+ : (Ordered ? OMP_ord_static : OMP_sch_static);
case OMPC_SCHEDULE_dynamic:
- return OMP_sch_dynamic_chunked;
+ return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
case OMPC_SCHEDULE_guided:
- return OMP_sch_guided_chunked;
- case OMPC_SCHEDULE_auto:
- return OMP_sch_auto;
+ return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
case OMPC_SCHEDULE_runtime:
- return OMP_sch_runtime;
+ return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
+ case OMPC_SCHEDULE_auto:
+ return Ordered ? OMP_ord_auto : OMP_sch_auto;
case OMPC_SCHEDULE_unknown:
assert(!Chunked && "chunk was specified but schedule kind not known");
- return OMP_sch_static;
+ return Ordered ? OMP_ord_static : OMP_sch_static;
}
llvm_unreachable("Unexpected runtime schedule");
}
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
bool Chunked) const {
- auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
return Schedule == OMP_sch_static;
}
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
- auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
+ auto Schedule =
+ getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
return Schedule != OMP_sch_static;
}
void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind ScheduleKind,
- unsigned IVSize, bool IVSigned,
+ unsigned IVSize, bool IVSigned, bool Ordered,
llvm::Value *IL, llvm::Value *LB,
llvm::Value *UB, llvm::Value *ST,
llvm::Value *Chunk) {
- OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
- if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+ if (Ordered ||
+ (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
+ Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
// Call __kmpc_dispatch_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
// kmp_int[32|64] lower, kmp_int[32|64] upper,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
if (Chunk == nullptr) {
- assert(Schedule == OMP_sch_static &&
+ assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
"expected static non-chunked schedule");
// If the Chunk was not specified in the clause - use default value 1.
Chunk = CGF.Builder.getIntN(IVSize, 1);
} else
- assert(Schedule == OMP_sch_static_chunked &&
+ assert((Schedule == OMP_sch_static_chunked ||
+ Schedule == OMP_ord_static_chunked) &&
"expected static chunked schedule");
llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
getThreadID(CGF, Loc),
Args);
}
-void CGOpenMPRuntime::emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF,
- SourceLocation Loc,
- unsigned IVSize,
- bool IVSigned) {
+void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ unsigned IVSize,
+ bool IVSigned) {
// Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
getThreadID(CGF, Loc)};
/// \param SchedKind Schedule kind, specified by the 'schedule' clause.
/// \param IVSize Size of the iteration variable in bits.
/// \param IVSigned Sign of the interation variable.
+ /// \param Ordered true if loop is ordered, false otherwise.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
/// \param LB Address of the output variable in which the lower iteration
///
virtual void emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind SchedKind, unsigned IVSize,
- bool IVSigned, llvm::Value *IL, llvm::Value *LB,
- llvm::Value *UB, llvm::Value *ST,
+ bool IVSigned, bool Ordered, llvm::Value *IL,
+ llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
llvm::Value *Chunk = nullptr);
/// \brief Call the appropriate runtime routine to notify that we finished
/// \param IVSize Size of the iteration variable in bits.
/// \param IVSigned Sign of the interation variable.
///
- virtual void emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF,
- SourceLocation Loc,
- unsigned IVSize,
- bool IVSigned);
+ virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF,
+ SourceLocation Loc, unsigned IVSize,
+ bool IVSigned);
/// \brief Call the appropriate runtime routine to notify that we finished
/// all the work with current loop.
void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
const OMPLoopDirective &S,
OMPPrivateScope &LoopScope,
- llvm::Value *LB, llvm::Value *UB,
- llvm::Value *ST, llvm::Value *IL,
- llvm::Value *Chunk) {
+ bool Ordered, llvm::Value *LB,
+ llvm::Value *UB, llvm::Value *ST,
+ llvm::Value *IL, llvm::Value *Chunk) {
auto &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
- const bool Dynamic = RT.isDynamic(ScheduleKind);
+ const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
- assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+ assert((Ordered ||
+ !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
"static non-chunked schedule does not need outer loop");
// Emit outer loop.
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
RT.emitForInit(
- *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
- (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
- Chunk);
+ *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB,
+ (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal()
+ : UB),
+ ST, Chunk);
auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
LoopStack.push(CondBlock);
llvm::Value *BoolCondVal = nullptr;
- if (!Dynamic) {
+ if (!DynamicOrOrdered) {
// UB = min(UB, GlobalUB)
EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB
// Emit "IV = LB" (in case of static schedule, we have already calculated new
// LB for loop condition and emitted it above).
- if (Dynamic)
+ if (DynamicOrOrdered)
EmitIgnoredExpr(S.getInit());
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
- bool DynamicWithOrderedClause =
- Dynamic && S.getSingleClause(OMPC_ordered) != nullptr;
SourceLocation Loc = S.getLocStart();
// Generate !llvm.loop.parallel metadata for loads and stores for loops with
// dynamic/guided scheduling and without ordered clause.
LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
ScheduleKind == OMPC_SCHEDULE_guided) &&
- !DynamicWithOrderedClause);
+ !Ordered);
EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false),
S.getInc(),
CGF.EmitOMPLoopBody(S);
CGF.EmitStopPoint(&S);
},
- [DynamicWithOrderedClause, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
- if (DynamicWithOrderedClause) {
- CGF.CGM.getOpenMPRuntime().emitForOrderedDynamicIterationEnd(
+ [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
+ if (Ordered) {
+ CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
CGF, Loc, IVSize, IVSigned);
}
});
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
- if (!Dynamic) {
+ if (!DynamicOrOrdered) {
// Emit "LB = LB + Stride", "UB = UB + Stride".
EmitIgnoredExpr(S.getNextLowerBound());
EmitIgnoredExpr(S.getNextUpperBound());
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
- if (!Dynamic)
+ if (!DynamicOrOrdered)
RT.emitForStaticFinish(*this, S.getLocEnd());
}
ScheduleKind = ScheduleInfo.second;
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+ const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr;
if (RT.isStaticNonchunked(ScheduleKind,
- /* Chunked */ Chunk != nullptr)) {
+ /* Chunked */ Chunk != nullptr) &&
+ !Ordered) {
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When no chunk_size is specified, the iteration space is divided into
// chunks that are approximately equal in size, and at most one chunk is
// distributed to each thread. Note that the size of the chunks is
// unspecified in this case.
RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
- IL.getAddress(), LB.getAddress(), UB.getAddress(),
- ST.getAddress());
+ Ordered, IL.getAddress(), LB.getAddress(),
+ UB.getAddress(), ST.getAddress());
// UB = min(UB, GlobalUB);
EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
- EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
- UB.getAddress(), ST.getAddress(), IL.getAddress(),
- Chunk);
+ EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered,
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ IL.getAddress(), Chunk);
}
EmitOMPReductionClauseFinal(S);
// Emit final copy of the lastprivate variables if IsLastIter != 0.
// Emit static non-chunked loop.
CGF.CGM.getOpenMPRuntime().emitForInit(
CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
- /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
- ST.getAddress());
+ /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
+ LB.getAddress(), UB.getAddress(), ST.getAddress());
// UB = min(UB, GlobalUB);
auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
const OMPLoopDirective &S,
- OMPPrivateScope &LoopScope, llvm::Value *LB,
- llvm::Value *UB, llvm::Value *ST, llvm::Value *IL,
- llvm::Value *Chunk);
+ OMPPrivateScope &LoopScope, bool Ordered,
+ llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
+ llvm::Value *IL, llvm::Value *Chunk);
public:
void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for schedule(static) ordered
-// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
-// UB = min(UB, GlobalUB)
-// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
-// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
-// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
-// CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
-// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
-// CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
-// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
+// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+//
+// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
+// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
+// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
+
// Loop header
+// CHECK: [[O_LOOP1_BODY]]
+// CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
+// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
+
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
+// CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// CHECK-NEXT: br label %{{.+}}
}
// CHECK: [[LOOP1_END]]
-// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK: [[O_LOOP1_END]]
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
// CHECK: ret void
}
void dynamic1(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for schedule(dynamic) ordered
-// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1)
+// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
//
// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
unsigned int y = 0;
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for schedule(auto) collapse(2) ordered
-// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1)
+// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 70, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1)
//
// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
int x = 0;
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for collapse(2) schedule(runtime) ordered
-// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1)
+// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 69, i32 0, i32 199, i32 1, i32 1)
//
// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0