From 0e9198c3e95adced7213999dcd14daed4acfd16c Mon Sep 17 00:00:00 2001 From: Arnamoy Bhattacharyya Date: Tue, 15 Mar 2022 09:41:04 -0400 Subject: [PATCH] [MLIR][OpenMP] Add support for basic SIMD construct Patch adds a new operation for the SIMD construct. The op is designed to be very similar to the existing `wsloop` operation, so that the `CanonicalLoopInfo` of `OpenMPIRBuilder` can be used. Reviewed By: shraiysh Differential Revision: https://reviews.llvm.org/D118065 --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 46 ++++++++++++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 74 ++++++++++++++++++++ .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 79 ++++++++++++++++++++++ mlir/test/Dialect/OpenMP/invalid.mlir | 13 ++++ mlir/test/Dialect/OpenMP/ops.mlir | 31 +++++++++ mlir/test/Target/LLVMIR/openmp-llvm.mlir | 42 ++++++++++++ 6 files changed, 284 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index aedd1e5..0cf9918 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -308,9 +308,53 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// Simd construct [2.9.3.1] +//===----------------------------------------------------------------------===// + +def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments, + AllTypesMatch<["lowerBound", "upperBound", "step"]>]> { + let summary = "simd loop construct"; + let description = [{ + The simd construct can be applied to a loop to indicate that the loop can be + transformed into a SIMD loop (that is, multiple iterations of the loop can + be executed concurrently using SIMD instructions).. The lower and upper + bounds specify a half-open range: the range includes the lower bound but + does not include the upper bound. + + The body region can contain any number of blocks. The region is terminated + by "omp.yield" instruction without operands. + ``` + omp.simdloop (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) + step (%c1, %c1) { + // block operations + omp.yield + } + ``` + }]; + + // TODO: Add other clauses + let arguments = (ins Variadic:$lowerBound, + Variadic:$upperBound, + Variadic:$step); + + let regions = (region AnyRegion:$region); + + let extraClassDeclaration = [{ + /// Returns the number of loops in the simd loop nest. + unsigned getNumLoops() { return lowerBound().size(); } + + }]; + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + + def YieldOp : OpenMP_Op<"yield", [NoSideEffect, ReturnLike, Terminator, - ParentOneOf<["WsLoopOp", "ReductionDeclareOp", "AtomicUpdateOp"]>]> { + ParentOneOf<["WsLoopOp", "ReductionDeclareOp", + "AtomicUpdateOp", "SimdLoopOp"]>]> { let summary = "loop yield and termination operation"; let description = [{ "omp.yield" yields SSA values from the OpenMP dialect op region and diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 15e9cbc..774b6b3 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -839,6 +839,80 @@ void WsLoopOp::print(OpAsmPrinter &p) { } //===----------------------------------------------------------------------===// +// SimdLoopOp +//===----------------------------------------------------------------------===// +/// Parses an OpenMP Simd construct [2.9.3.1] +/// +/// simdloop ::= `omp.simdloop` loop-control clause-list +/// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds +/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` steps +/// steps := `step` `(`ssa-id-list`)` +/// clause-list ::= clause clause-list | empty +/// clause ::= TODO +ParseResult SimdLoopOp::parse(OpAsmParser &parser, OperationState &result) { + // Parse an opening `(` followed by induction variables followed by `)` + SmallVector ivs; + if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1, + OpAsmParser::Delimiter::Paren)) + return failure(); + int numIVs = static_cast(ivs.size()); + Type loopVarType; + if (parser.parseColonType(loopVarType)) + return failure(); + // Parse loop bounds. + SmallVector lower; + if (parser.parseEqual() || + parser.parseOperandList(lower, numIVs, OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(lower, loopVarType, result.operands)) + return failure(); + SmallVector upper; + if (parser.parseKeyword("to") || + parser.parseOperandList(upper, numIVs, OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(upper, loopVarType, result.operands)) + return failure(); + + // Parse step values. + SmallVector steps; + if (parser.parseKeyword("step") || + parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(steps, loopVarType, result.operands)) + return failure(); + + SmallVector segments{numIVs, numIVs, numIVs}; + // TODO: Add parseClauses() when we support clauses + result.addAttribute("operand_segment_sizes", + parser.getBuilder().getI32VectorAttr(segments)); + + // Now parse the body. + Region *body = result.addRegion(); + SmallVector ivTypes(numIVs, loopVarType); + SmallVector blockArgs(ivs); + if (parser.parseRegion(*body, blockArgs, ivTypes)) + return failure(); + return success(); +} + +void SimdLoopOp::print(OpAsmPrinter &p) { + auto args = getRegion().front().getArguments(); + p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound() + << ") to (" << upperBound() << ") "; + p << "step (" << step() << ") "; + + p.printRegion(region(), /*printEntryBlockArgs=*/false); +} + +//===----------------------------------------------------------------------===// +// Verifier for Simd construct [2.9.3.1] +//===----------------------------------------------------------------------===// + +LogicalResult SimdLoopOp::verify() { + if (this->lowerBound().empty()) { + return emitOpError() << "empty lowerbound for simd loop operation"; + } + return success(); +} + +//===----------------------------------------------------------------------===// // ReductionOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 21f2d01..e1652af 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -880,6 +880,82 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } +/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. +static LogicalResult +convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto loop = cast(opInst); + + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + + // Generator of the canonical loop body. + // TODO: support error propagation in OpenMPIRBuilder and use it instead of + // relying on captured variables. + SmallVector loopInfos; + SmallVector bodyInsertPoints; + LogicalResult bodyGenStatus = success(); + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { + // Make sure further conversions know about the induction variable. + moduleTranslation.mapValue( + loop.getRegion().front().getArgument(loopInfos.size()), iv); + + // Capture the body insertion point for use in nested loops. BodyIP of the + // CanonicalLoopInfo always points to the beginning of the entry block of + // the body. + bodyInsertPoints.push_back(ip); + + if (loopInfos.size() != loop.getNumLoops() - 1) + return; + + // Convert the body of the loop. + llvm::BasicBlock *entryBlock = ip.getBlock(); + llvm::BasicBlock *exitBlock = + entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit"); + convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock, + *exitBlock, builder, moduleTranslation, bodyGenStatus); + }; + + // Delegate actual loop construction to the OpenMP IRBuilder. + // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, + // i.e. it has a positive step, uses signed integer semantics. Reconsider + // this code when SimdLoop clearly supports more cases. + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { + llvm::Value *lowerBound = + moduleTranslation.lookupValue(loop.lowerBound()[i]); + llvm::Value *upperBound = + moduleTranslation.lookupValue(loop.upperBound()[i]); + llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); + + // Make sure loop trip count are emitted in the preheader of the outermost + // loop at the latest so that they are all available for the new collapsed + // loop will be created below. + llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; + llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; + if (i != 0) { + loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), + ompLoc.DL); + computeIP = loopInfos.front()->getPreheaderIP(); + } + loopInfos.push_back(ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); + + if (failed(bodyGenStatus)) + return failure(); + } + + // Collapse loops. + llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); + llvm::CanonicalLoopInfo *loopInfo = + ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); + + ompBuilder->applySimd(ompLoc.DL, loopInfo); + + builder.restoreIP(afterIP); + return success(); +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. llvm::AtomicOrdering convertAtomicOrdering(Optional ao) { @@ -1160,6 +1236,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(*op, builder, moduleTranslation); }) + .Case([&](omp::SimdLoopOp) { + return convertOmpSimdLoop(*op, builder, moduleTranslation); + }) .Case([&](omp::AtomicReadOp) { return convertOmpAtomicRead(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index bbfc1e9..d871b43 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -182,6 +182,19 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step : // ----- +func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () { + // expected-error @below {{op failed to verify that all of {lowerBound, upperBound, step} have same type}} + "omp.simdloop" (%lb, %ub, %step) ({ + ^bb0(%iv: index): + omp.yield + }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} : + (index, index, i32) -> () + + return +} + +// ----- + // expected-error @below {{op expects initializer region with one argument of the reduction type}} omp.reduction.declare @add_f32 : f64 init { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 62776e2..9e66295 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -299,6 +299,37 @@ func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i3 return } +// CHECK-LABEL: omp_simdloop +func @omp_simdloop(%lb : index, %ub : index, %step : index) -> () { + // CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + "omp.simdloop" (%lb, %ub, %step) ({ + ^bb0(%iv: index): + omp.yield + }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} : + (index, index, index) -> () + + return +} + + +// CHECK-LABEL: omp_simdloop_pretty +func @omp_simdloop_pretty(%lb : index, %ub : index, %step : index) -> () { + // CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + omp.simdloop (%iv) : index = (%lb) to (%ub) step (%step) { + omp.yield + } + return +} + +// CHECK-LABEL: omp_simdloop_pretty_multiple +func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> () { + // CHECK: omp.simdloop (%{{.*}}, %{{.*}}) : index = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) + omp.simdloop (%iv1, %iv2) : index = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.yield + } + return +} + // CHECK-LABEL: omp_target func @omp_target(%if_cond : i1, %device : si32, %num_threads : si32) -> () { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 3b707b1..b29ba0d 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -663,6 +663,48 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () // ----- +// CHECK-LABEL: @simdloop_simple +llvm.func @simdloop_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { + "omp.simdloop" (%lb, %ub, %step) ({ + ^bb0(%iv: i64): + %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + // The form of the emitted IR is controlled by OpenMPIRBuilder and + // tested there. Just check that the right metadata is added. + // CHECK: llvm.access.group + %4 = llvm.getelementptr %arg0[%iv] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %3, %4 : !llvm.ptr + omp.yield + }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} : + (i64, i64, i64) -> () + + llvm.return +} +// CHECK: llvm.loop.parallel_accesses +// CHECK-NEXT: llvm.loop.vectorize.enable + +// ----- + +// CHECK-LABEL: @simdloop_simple_multiple +llvm.func @simdloop_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { + omp.simdloop (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + // The form of the emitted IR is controlled by OpenMPIRBuilder and + // tested there. Just check that the right metadata is added. + // CHECK: llvm.access.group + // CHECK-NEXT: llvm.access.group + %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr + %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %3, %4 : !llvm.ptr + llvm.store %3, %5 : !llvm.ptr + omp.yield + } + llvm.return +} +// CHECK: llvm.loop.parallel_accesses +// CHECK-NEXT: llvm.loop.vectorize.enable + +// ----- + omp.critical.declare @mutex hint(contended) // CHECK-LABEL: @omp_critical -- 2.7.4