return bodyGenStatus;
}
+/// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ LogicalResult bodyGenStatus = success();
+ if (!tgOp.task_reduction_vars().empty() || !tgOp.allocate_vars().empty()) {
+ return tgOp.emitError("unhandled clauses for translation to LLVM IR");
+ }
+ auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
+ builder.restoreIP(codegenIP);
+ convertOmpOpRegions(tgOp.region(), "omp.taskgroup.region", builder,
+ moduleTranslation, bodyGenStatus);
+ };
+ InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
+ ompLoc, allocaIP, bodyCB));
+ return bodyGenStatus;
+}
+
/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
.Case([&](omp::TaskOp op) {
return convertOmpTaskOp(op, builder, moduleTranslation);
})
+ .Case([&](omp::TaskGroupOp op) {
+ return convertOmpTaskgroupOp(op, builder, moduleTranslation);
+ })
.Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
omp::CriticalDeclareOp>([](auto op) {
// `yield` and `terminator` can be just omitted. The block structure
// CHECK: call void @[[outlined_fn]](ptr %[[task_data]])
// CHECK: ret i32 0
// CHECK: }
+
+// -----
+
+llvm.func @foo() -> ()
+
+llvm.func @omp_taskgroup(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
+ omp.taskgroup {
+ llvm.call @foo() : () -> ()
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @omp_taskgroup(
+// CHECK-SAME: i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
+// CHECK: br label %[[entry:[^,]+]]
+// CHECK: [[entry]]:
+// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
+// CHECK: call void @__kmpc_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
+// CHECK: br label %[[omp_taskgroup_region:[^,]+]]
+// CHECK: [[omp_taskgroup_region]]:
+// CHECK: call void @foo()
+// CHECK: br label %[[omp_region_cont:[^,]+]]
+// CHECK: [[omp_region_cont]]:
+// CHECK: br label %[[taskgroup_exit:[^,]+]]
+// CHECK: [[taskgroup_exit]]:
+// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
+// CHECK: ret void
+
+// -----
+
+llvm.func @foo() -> ()
+llvm.func @bar(i32, i32, !llvm.ptr<i32>) -> ()
+
+llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
+ omp.taskgroup {
+ %c1 = llvm.mlir.constant(1) : i32
+ %ptr1 = llvm.alloca %c1 x i8 : (i32) -> !llvm.ptr<i8>
+ omp.task {
+ llvm.call @foo() : () -> ()
+ omp.terminator
+ }
+ omp.task {
+ llvm.call @bar(%x, %y, %zaddr) : (i32, i32, !llvm.ptr<i32>) -> ()
+ omp.terminator
+ }
+ llvm.br ^bb1
+ ^bb1:
+ llvm.call @foo() : () -> ()
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @omp_taskgroup_task(
+// CHECK-SAME: i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
+// CHECK: %[[structArg:.+]] = alloca { i32, i32, ptr }, align 8
+// CHECK: br label %[[entry:[^,]+]]
+// CHECK: [[entry]]: ; preds = %3
+// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
+// CHECK: call void @__kmpc_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
+// CHECK: br label %[[omp_taskgroup_region:[^,]+]]
+// CHECK: [[omp_taskgroup_region1:.+]]:
+// CHECK: call void @foo()
+// CHECK: br label %[[omp_region_cont:[^,]+]]
+// CHECK: [[omp_taskgroup_region]]:
+// CHECK: %{{.+}} = alloca i8, align 1
+// CHECK: br label %[[codeRepl:[^,]+]]
+// CHECK: [[codeRepl]]:
+// CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
+// CHECK: %[[t1_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], i32 1, i64 0, i64 0, ptr @omp_taskgroup_task..omp_par.wrapper)
+// CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t1]], ptr %[[t1_alloc]])
+// CHECK: br label %[[task_exit:[^,]+]]
+// CHECK: [[task_exit]]:
+// CHECK: br label %[[codeRepl9:[^,]+]]
+// CHECK: [[codeRepl9]]:
+// CHECK: %[[gep1:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK: store i32 %[[x]], ptr %[[gep1]], align 4
+// CHECK: %[[gep2:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK: store i32 %[[y]], ptr %[[gep2]], align 4
+// CHECK: %[[gep3:.+]] = getelementptr { i32, i32, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK: store ptr %[[zaddr]], ptr %[[gep3]], align 8
+// CHECK: %[[omp_global_thread_num_t2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
+// CHECK: %[[t2_alloc:.+]] = call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], i32 1, i64 16, i64 0, ptr @omp_taskgroup_task..omp_par.1.wrapper)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[t2_alloc]], ptr align 8 %[[structArg]], i64 16, i1 false)
+// CHECK: %{{.+}} = call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num_t2]], ptr %[[t2_alloc]])
+// CHECK: br label %[[task_exit3:[^,]+]]
+// CHECK: [[task_exit3]]:
+// CHECK: br label %[[omp_taskgroup_region1]]
+// CHECK: [[omp_region_cont]]:
+// CHECK: br label %[[taskgroup_exit:[^,]+]]
+// CHECK: [[taskgroup_exit]]:
+// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
+// CHECK: ret void
+// CHECK: }