From 28d85d207fc37b5593c17a25f687c91b7afda5b4 Mon Sep 17 00:00:00 2001 From: Jan Sjodin Date: Wed, 26 Apr 2023 16:39:14 -0400 Subject: [PATCH] [OpenMP][Flang][MLIR] Add lowering of TargetOp for host codegen to LLVM-IR Fix uninitialied value use introduced in d3f9388ffb889d2ef512a17b9c4d37d09f03c693 --- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 19 +++-- mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 85 ---------------------- mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir | 32 ++++++++ .../LLVMIR/omptarget-region-parallel-llvm.mlir | 52 +++++++++++++ 4 files changed, 96 insertions(+), 92 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir create mode 100644 mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 2737880..6249753 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1576,9 +1576,9 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, return success(); } -static llvm::TargetRegionEntryInfo -getTargetEntryUniqueInfo(omp::TargetOp targetOp, - llvm::StringRef parentName = "") { +static bool getTargetEntryUniqueInfo(llvm::TargetRegionEntryInfo &targetInfo, + omp::TargetOp targetOp, + llvm::StringRef parentName = "") { auto fileLoc = targetOp.getLoc()->findInstanceOf(); assert(fileLoc && "No file found from location"); @@ -1587,11 +1587,13 @@ getTargetEntryUniqueInfo(omp::TargetOp targetOp, llvm::sys::fs::UniqueID id; if (auto ec = llvm::sys::fs::getUniqueID(fileName, id)) { targetOp.emitError("Unable to get unique ID for file"); + return false; } uint64_t line = fileLoc.getLine(); - return llvm::TargetRegionEntryInfo(parentName, id.getDevice(), id.getFile(), - line); + targetInfo = llvm::TargetRegionEntryInfo(parentName, id.getDevice(), + id.getFile(), line); + return true; } static bool targetOpSupported(Operation &opInst) { @@ -1660,8 +1662,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); StringRef parentName = opInst.getParentOfType().getName(); - llvm::TargetRegionEntryInfo entryInfo = - getTargetEntryUniqueInfo(targetOp, parentName); + llvm::TargetRegionEntryInfo entryInfo; + + if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName)) + return failure(); + int32_t defaultValTeams = -1; int32_t defaultValThreads = -1; diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 331034c..7f2a45f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -175,88 +175,3 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr>, %3 : !llv // ----- -module attributes {omp.is_device = #omp.isdevice} { - llvm.func @omp_target_region_() { - %0 = llvm.mlir.constant(20 : i32) : i32 - %1 = llvm.mlir.constant(10 : i32) : i32 - %2 = llvm.mlir.constant(1 : i64) : i64 - %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr - %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr - %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr - llvm.store %1, %3 : !llvm.ptr - llvm.store %0, %5 : !llvm.ptr - omp.target { - %8 = llvm.load %3 : !llvm.ptr - %9 = llvm.load %5 : !llvm.ptr - %10 = llvm.add %8, %9 : i32 - llvm.store %10, %7 : !llvm.ptr - omp.terminator - } - llvm.return - } -} - -// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) -// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]](ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]]) -// CHECK: %[[VAL_A:.*]] = load i32, ptr %[[ADDR_A]], align 4 -// CHECK: %[[VAL_B:.*]] = load i32, ptr %[[ADDR_B]], align 4 -// CHECK: %[[SUM:.*]] = add i32 %[[VAL_A]], %[[VAL_B]] -// CHECK: store i32 %[[SUM]], ptr %[[ADDR_C]], align 4 - -// ----- - -module attributes {omp.is_device = #omp.isdevice} { - llvm.func @omp_target_region_() { - %0 = llvm.mlir.constant(20 : i32) : i32 - %1 = llvm.mlir.constant(10 : i32) : i32 - %2 = llvm.mlir.constant(1 : i64) : i64 - %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr - %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr - %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr - llvm.store %1, %3 : !llvm.ptr - llvm.store %0, %5 : !llvm.ptr - omp.target { - omp.parallel { - %8 = llvm.load %3 : !llvm.ptr - %9 = llvm.load %5 : !llvm.ptr - %10 = llvm.add %8, %9 : i32 - llvm.store %10, %7 : !llvm.ptr - omp.terminator - } - omp.terminator - } - llvm.return - } -} - -// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) -// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]](ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]]) -// CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr, ptr }, align 8 -// CHECK: %[[GEP1:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 -// CHECK: store ptr %[[ADDR_A]], ptr %[[GEP1]], align 8 -// CHECK: %[[GEP2:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1 -// CHECK: store ptr %[[ADDR_B]], ptr %[[GEP2]], align 8 -// CHECK: %[[GEP3:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2 -// CHECK: store ptr %[[ADDR_C]], ptr %[[GEP3]], align 8 -// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]]) - - -// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %[[STRUCTARG2:.*]]) #0 { -// CHECK: %[[GEP4:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG2]], i32 0, i32 0 -// CHECK: %[[LOADGEP1:.*]] = load ptr, ptr %[[GEP4]], align 8 -// CHECK: %[[GEP5:.*]] = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 1 -// CHECK: %[[LOADGEP2:.*]] = load ptr, ptr %[[GEP5]], align 8 -// CHECK: %[[GEP6:.*]] = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 2 -// CHECK: %[[LOADGEP3:.*]] = load ptr, ptr %[[GEP6]], align 8 - -// CHECK: %[[VAL_A:.*]] = load i32, ptr %[[LOADGEP1]], align 4 -// CHECK: %[[VAL_B:.*]] = load i32, ptr %[[LOADGEP2]], align 4 -// CHECK: %[[SUM:.*]] = add i32 %[[VAL_A]], %[[VAL_B]] -// CHECK: store i32 %[[SUM]], ptr %[[LOADGEP3]], align 4 - -// ----- - diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir new file mode 100644 index 0000000..68b2f49 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir @@ -0,0 +1,32 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_device = #omp.isdevice} { + llvm.func @omp_target_region_() { + %0 = llvm.mlir.constant(20 : i32) : i32 + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr + llvm.store %1, %3 : !llvm.ptr + llvm.store %0, %5 : !llvm.ptr + omp.target { + %8 = llvm.load %3 : !llvm.ptr + %9 = llvm.load %5 : !llvm.ptr + %10 = llvm.add %8, %9 : i32 + llvm.store %10, %7 : !llvm.ptr + omp.terminator + } + llvm.return + } +} + +// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) +// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]](ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]]) +// CHECK: %[[VAL_A:.*]] = load i32, ptr %[[ADDR_A]], align 4 +// CHECK: %[[VAL_B:.*]] = load i32, ptr %[[ADDR_B]], align 4 +// CHECK: %[[SUM:.*]] = add i32 %[[VAL_A]], %[[VAL_B]] +// CHECK: store i32 %[[SUM]], ptr %[[ADDR_C]], align 4 + diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir new file mode 100644 index 0000000..455edda --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir @@ -0,0 +1,52 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_device = #omp.isdevice} { + llvm.func @omp_target_region_() { + %0 = llvm.mlir.constant(20 : i32) : i32 + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr + llvm.store %1, %3 : !llvm.ptr + llvm.store %0, %5 : !llvm.ptr + omp.target { + omp.parallel { + %8 = llvm.load %3 : !llvm.ptr + %9 = llvm.load %5 : !llvm.ptr + %10 = llvm.add %8, %9 : i32 + llvm.store %10, %7 : !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return + } +} + +// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) +// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]](ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]]) +// CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK: %[[GEP1:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 +// CHECK: store ptr %[[ADDR_A]], ptr %[[GEP1]], align 8 +// CHECK: %[[GEP2:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1 +// CHECK: store ptr %[[ADDR_B]], ptr %[[GEP2]], align 8 +// CHECK: %[[GEP3:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2 +// CHECK: store ptr %[[ADDR_C]], ptr %[[GEP3]], align 8 +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]]) + + +// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %[[STRUCTARG2:.*]]) #0 { +// CHECK: %[[GEP4:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG2]], i32 0, i32 0 +// CHECK: %[[LOADGEP1:.*]] = load ptr, ptr %[[GEP4]], align 8 +// CHECK: %[[GEP5:.*]] = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 1 +// CHECK: %[[LOADGEP2:.*]] = load ptr, ptr %[[GEP5]], align 8 +// CHECK: %[[GEP6:.*]] = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 2 +// CHECK: %[[LOADGEP3:.*]] = load ptr, ptr %[[GEP6]], align 8 + +// CHECK: %[[VAL_A:.*]] = load i32, ptr %[[LOADGEP1]], align 4 +// CHECK: %[[VAL_B:.*]] = load i32, ptr %[[LOADGEP2]], align 4 +// CHECK: %[[SUM:.*]] = add i32 %[[VAL_A]], %[[VAL_B]] +// CHECK: store i32 %[[SUM]], ptr %[[LOADGEP3]], align 4 -- 2.7.4