From 77124386feb615343afef2740396de1baceb1336 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 28 Apr 2023 10:34:03 +0900 Subject: [PATCH] [mlir][tensor] Add transform to make tensor.pad loop-independent Add a transform to make `tensor.pad` and `tensor.empty` ops independent of SCF loop IVs. Such ops can then be hoisted. E.g.: ``` scf.for %iv = %lb to %ub step %step { %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] %p = tensor.pad %t low[5] high[%high] ... ... } ``` Is transformed to: ``` %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub] %p_hoistable = tensor.pad %t low[5] high[%high_new] %dim = tensor.dim %t, %c0 %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>(%iv)[%ub, %dim] %slice = tensor.extract_slice %p_hoistable [0] [%size] [1] ``` Differential Revision: https://reviews.llvm.org/D143910 --- .../mlir/Dialect/Affine/Transforms/Transforms.h | 14 ++ mlir/include/mlir/Dialect/Tensor/CMakeLists.txt | 1 + .../Dialect/Tensor/TransformOps/CMakeLists.txt | 6 + .../Tensor/TransformOps/TensorTransformOps.h | 8 ++ .../Tensor/TransformOps/TensorTransformOps.td | 64 +++++++++ .../mlir/Dialect/Tensor/Transforms/Transforms.h | 39 ++++++ mlir/include/mlir/InitAllDialects.h | 2 + .../mlir/Interfaces/ValueBoundsOpInterface.h | 25 +++- .../Dialect/Affine/Transforms/ReifyValueBounds.cpp | 9 +- .../lib/Dialect/Tensor/TransformOps/CMakeLists.txt | 6 + .../Tensor/TransformOps/TensorTransformOps.cpp | 81 +++++++++++ mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt | 3 + .../Tensor/Transforms/IndependenceTransforms.cpp | 136 +++++++++++++++++++ mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 36 ++++- .../Tensor/transform-op-make-loop-independent.mlir | 151 +++++++++++++++++++++ utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 38 ++++++ 16 files changed, 611 insertions(+), 8 deletions(-) create mode 100644 mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td create mode 100644 mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp create mode 100644 mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir diff --git a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h index 02938d1..8e840e7 100644 --- a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h @@ -15,9 +15,11 @@ #define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H #include "mlir/Interfaces/ValueBoundsOpInterface.h" +#include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" namespace mlir { +class AffineMap; class Location; class OpBuilder; class OpFoldResult; @@ -85,6 +87,18 @@ FailureOr reifyShapedValueDimBound( ValueBoundsConstraintSet::StopConditionFn stopCondition = nullptr, bool closedUB = false); +/// Materialize an already computed bound with Affine dialect ops. +/// +/// * `ValueBoundsOpInterface::computeBound` computes bounds but does not +/// create IR. It is dialect independent. +/// * `materializeComputedBound` materializes computed bounds with Affine +/// dialect ops. +/// * `reifyIndexValueBound`/`reifyShapedValueDimBound` are a combination of +/// the two functions mentioned above. +OpFoldResult materializeComputedBound( + OpBuilder &b, Location loc, AffineMap boundMap, + ArrayRef>> mapOperands); + } // namespace affine } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt index 9f57627..cb1e9d0 100644 --- a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt new file mode 100644 index 0000000..bb9f703 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td) +mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls) +mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRTensorTransformOpsIncGen) + +add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc) diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h index d1b14d2..c735700 100644 --- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h @@ -11,10 +11,13 @@ #include "mlir/Dialect/PDL/IR/PDLTypes.h" #include "mlir/Dialect/Transform/IR/TransformOps.h" +#include "mlir/Dialect/Transform/IR/TransformTypes.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" namespace mlir { +class DialectRegistry; + namespace tensor { /// A specialized TrackingListener for transform ops that operate on tensor IR. @@ -29,7 +32,12 @@ protected: ValueRange newValues) const override; }; +void registerTransformDialectExtension(DialectRegistry ®istry); + } // namespace tensor } // namespace mlir +#define GET_OP_CLASSES +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc" + #endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td new file mode 100644 index 0000000..42be882 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td @@ -0,0 +1,64 @@ +//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef TENSOR_TRANSFORM_OPS +#define TENSOR_TRANSFORM_OPS + +include "mlir/Dialect/PDL/IR/PDLTypes.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/IR/TransformInterfaces.td" +include "mlir/Dialect/Transform/IR/TransformTypes.td" +include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/OpBase.td" + +def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">; + +def MakeLoopIndependentOp + : Op { + let description = [{ + Rewrite the targeted ops such that their index-typed operands no longer + depend on any loop induction variable of the `num_loop` enclosing `scf.for` + loops. I.e., compute an upper bound that is independent of any such loop IV + for every tensor dimension. The transformed op could then be hoisted from + the `num_loop` enclosing loops. To preserve the original semantics, place a + `tensor.extract_slice` inside the loop. + + Currently supported operations are: + - tensor.empty: Replaced with a new tensor.empty with upper bound sizes, + followed by a tensor.extract_slice. + - tensor.pad: Replaced by an upper bound padding, followed by a + tensor.extract_slice. + + #### Return modes + + This operation fails if at least one induction variable could not be + eliminated. In case the targeted op is already independent of induction + variables, this transform succeeds and returns the unmodified target op. + + Otherwise, the returned handle points to a subset of the produced ops: + - tensor.empty: The returned handle points to the tensor.extract_slice op. + - tensor.pad: The returned handle points to the tensor.extract_slice op. + + This transform op consumes the target handle and produces a result handle. + }]; + + let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops); + let results = (outs PDL_Operation:$transformed); + let assemblyFormat = "$target attr-dict"; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::Operation *target, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state); + }]; +} + +#endif // TENSOR_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h index a3b5abf..9922dc8 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h @@ -61,6 +61,45 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns); /// respectively. void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns); +//===----------------------------------------------------------------------===// +// Transform helpers +//===----------------------------------------------------------------------===// + +/// Build a new tensor::PadOp with low/high padding that is independent of all +/// given independencies. If the op is already independent of all +/// independencies, the same PadOp result is returned. +/// +/// Failure indicates the no suitable upper bound for low/high padding could be +/// found. +/// +/// Example: +/// scf.for %iv = %lb to %ub step %step { +/// %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] +/// %p = tensor.pad %t low[5] high[%high] ... +/// ... +/// } +/// +/// The function builds IR such as: +/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub] +/// %p_hoistable = tensor.pad %t low[5] high[%high_new] +/// %dim = tensor.dim %t, %c0 +/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +/// (%iv)[%ub, %dim] +/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1] +/// +/// The slice is returned. +FailureOr buildIndependentOp(OpBuilder &b, tensor::PadOp padOp, + ValueRange independencies); + +/// Build a new tensor::EmptyOp who's dynamic sizes are independent of all +/// given independencies. If the op is already independent of all +/// independencies, the same EmptyOp result is returned. +/// +/// Failure indicates the no suitable upper bound for the dynamic sizes could be +/// found. +FailureOr buildIndependentOp(OpBuilder &b, tensor::EmptyOp emptyOp, + ValueRange independencies); + } // namespace tensor } // namespace mlir diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 6f78bab..560b683 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -71,6 +71,7 @@ #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.h" +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h" #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" @@ -132,6 +133,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { linalg::registerTransformDialectExtension(registry); memref::registerTransformDialectExtension(registry); scf::registerTransformDialectExtension(registry); + tensor::registerTransformDialectExtension(registry); vector::registerTransformDialectExtension(registry); // Register all external models. diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index b4d070a..ac71b73 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -114,12 +114,25 @@ public: /// Compute a bound in terms of the values/dimensions in `dependencies`. The /// computed bound consists of only constant terms and dependent values (or /// dimension sizes thereof). - static LogicalResult computeBound(AffineMap &resultMap, - ValueDimList &mapOperands, - presburger::BoundType type, Value value, - std::optional dim, - ValueDimList dependencies, - bool closedUB = false); + static LogicalResult + computeDependentBound(AffineMap &resultMap, ValueDimList &mapOperands, + presburger::BoundType type, Value value, + std::optional dim, ValueDimList dependencies, + bool closedUB = false); + + /// Compute a bound in that is independent of all values in `independencies`. + /// + /// Independencies are the opposite of dependencies. The computed bound does + /// not contain any SSA values that are part of `independencies`. E.g., this + /// function can be used to make ops hoistable from loops. To that end, ops + /// must be made independent of loop induction variables (in the case of "for" + /// loops). Loop induction variables are the independencies; they may not + /// appear in the computed bound. + static LogicalResult + computeIndependentBound(AffineMap &resultMap, ValueDimList &mapOperands, + presburger::BoundType type, Value value, + std::optional dim, ValueRange independencies, + bool closedUB = false); /// Compute a constant bound for the given index-typed value or shape /// dimension size. diff --git a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp index 0efe31c..4990229 100644 --- a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp @@ -19,7 +19,7 @@ using namespace mlir::affine; static FailureOr reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type, Value value, std::optional dim, - function_ref)> stopCondition, + ValueBoundsConstraintSet::StopConditionFn stopCondition, bool closedUB) { // Compute bound. AffineMap boundMap; @@ -28,6 +28,13 @@ reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type, boundMap, mapOperands, type, value, dim, stopCondition, closedUB))) return failure(); + // Reify bound. + return affine::materializeComputedBound(b, loc, boundMap, mapOperands); +} + +OpFoldResult affine::materializeComputedBound( + OpBuilder &b, Location loc, AffineMap boundMap, + ArrayRef>> mapOperands) { // Materialize tensor.dim/memref.dim ops. SmallVector operands; for (auto valueDim : mapOperands) { diff --git a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt index e8be901..be1a5dd 100644 --- a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt @@ -4,9 +4,15 @@ add_mlir_dialect_library(MLIRTensorTransformOps ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps + DEPENDS + MLIRTensorTransformOpsIncGen + LINK_LIBS PUBLIC + MLIRAffineDialect MLIRIR MLIRPDLDialect + MLIRSCFDialect MLIRTensorDialect + MLIRTensorTransforms MLIRTransformDialect ) diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp index 01e1a35..4394465 100644 --- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp +++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp @@ -8,8 +8,12 @@ #include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -49,3 +53,80 @@ tensor::TrackingListener::findReplacementOp(Operation *op, return nullptr; } + +//===----------------------------------------------------------------------===// +// MakeLoopIndependentOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne( + Operation *target, transform::ApplyToEachResultList &results, + transform::TransformState &state) { + // Gather IVs. + SmallVector ivs; + Operation *nextOp = target; + for (uint64_t i = 0, e = getNumLoops(); i < e; ++i) { + nextOp = nextOp->getParentOfType(); + if (!nextOp) { + DiagnosedSilenceableFailure diag = emitSilenceableError() + << "could not find " << i + << "-th enclosing loop"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + ivs.push_back(cast(nextOp).getInductionVar()); + } + + // Rewrite IR. + IRRewriter rewriter(target->getContext()); + FailureOr replacement = failure(); + if (auto padOp = dyn_cast(target)) { + replacement = tensor::buildIndependentOp(rewriter, padOp, ivs); + } else if (auto emptyOp = dyn_cast(target)) { + replacement = tensor::buildIndependentOp(rewriter, emptyOp, ivs); + } else { + DiagnosedSilenceableFailure diag = emitSilenceableError() + << "unsupported target op"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + if (failed(replacement)) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "could not make target op loop-independent"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + rewriter.replaceOp(target, *replacement); + results.push_back(replacement->getDefiningOp()); + return DiagnosedSilenceableFailure::success(); +} + +//===----------------------------------------------------------------------===// +// Transform op registration +//===----------------------------------------------------------------------===// + +namespace { +class TensorTransformDialectExtension + : public transform::TransformDialectExtension< + TensorTransformDialectExtension> { +public: + using Base::Base; + + void init() { + declareGeneratedDialect(); + declareGeneratedDialect(); + + registerTransformOps< +#define GET_OP_LIST +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc" + >(); + } +}; +} // namespace + +#define GET_OP_CLASSES +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc" + +void mlir::tensor::registerTransformDialectExtension( + DialectRegistry ®istry) { + registry.addExtensions(); +} diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt index 4457954..c41e9e9 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRTensorTransforms ExtractSliceFromReshapeUtils.cpp FoldIntoPackAndUnpackPatterns.cpp FoldTensorSubsetOps.cpp + IndependenceTransforms.cpp MergeConsecutiveInsertExtractSlicePatterns.cpp ReshapePatterns.cpp SwapExtractSliceWithProducerPatterns.cpp @@ -17,6 +18,7 @@ add_mlir_dialect_library(MLIRTensorTransforms LINK_LIBS PUBLIC MLIRAffineDialect + MLIRAffineTransforms MLIRAffineUtils MLIRArithDialect MLIRBufferizationDialect @@ -30,4 +32,5 @@ add_mlir_dialect_library(MLIRTensorTransforms MLIRTilingInterface MLIRTransforms MLIRVectorDialect + MLIRValueBoundsOpInterface ) diff --git a/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp new file mode 100644 index 0000000..7217308 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp @@ -0,0 +1,136 @@ +//===- IndependenceTransforms.cpp - Make ops independent of values --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Transforms/Transforms.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" +#include "mlir/Interfaces/ValueBoundsOpInterface.h" + +using namespace mlir; +using namespace mlir::tensor; + +/// Make the given OpFoldResult independent of all independencies. +static FailureOr makeIndependent(OpBuilder &b, Location loc, + OpFoldResult ofr, + ValueRange independencies) { + if (ofr.is()) + return ofr; + Value value = ofr.get(); + AffineMap boundMap; + ValueDimList mapOperands; + if (failed(ValueBoundsConstraintSet::computeIndependentBound( + boundMap, mapOperands, presburger::BoundType::UB, value, + /*dim=*/std::nullopt, independencies, /*closedUB=*/true))) + return failure(); + return mlir::affine::materializeComputedBound(b, loc, boundMap, mapOperands); +} + +FailureOr tensor::buildIndependentOp(OpBuilder &b, tensor::PadOp padOp, + ValueRange independencies) { + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(padOp); + Location loc = padOp.getLoc(); + + // Non-constant padding not supported. + Value constantPadding = padOp.getConstantPaddingValue(); + if (!constantPadding) + return failure(); + + SmallVector newMixedLow, newMixedHigh; + for (OpFoldResult ofr : padOp.getMixedLowPad()) { + auto ub = makeIndependent(b, loc, ofr, independencies); + if (failed(ub)) + return failure(); + newMixedLow.push_back(*ub); + } + for (OpFoldResult ofr : padOp.getMixedHighPad()) { + auto ub = makeIndependent(b, loc, ofr, independencies); + if (failed(ub)) + return failure(); + newMixedHigh.push_back(*ub); + } + + // Return existing tensor::PadOp if nothing has changed. + if (llvm::equal(padOp.getMixedLowPad(), newMixedLow) && + llvm::equal(padOp.getMixedHighPad(), newMixedHigh)) + return padOp.getResult(); + + // Create a new tensor::PadOp. + auto newPadOp = b.create( + loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh, + constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef{}); + + // Create a tensor::ExtractSliceOp. + // Reify the result sizes of the old tensor::PadOp. + ReifiedRankedShapedTypeDims reifiedSizes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(padOp.getOperation()); + if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes))) + return failure(); + SmallVector offsets, sizes, strides; + for (int64_t i = 0, e = padOp.getResultType().getRank(); i < e; ++i) { + // offset = ub(low_padding) - low_padding + OpFoldResult prevLow = padOp.getMixedLowPad()[i]; + if (prevLow.is()) { + offsets.push_back(b.getIndexAttr(0)); + } else { + offsets.push_back( + b.create( + loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1), + std::initializer_list{newMixedLow[i].get(), + prevLow.get()}) + .getResult()); + } + // size = reified result size + if (!padOp.getResultType().isDynamicDim(i)) { + sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i))); + } else { + sizes.push_back(reifiedSizes[0][i]); + } + // stride = 1 + strides.push_back(b.getIndexAttr(1)); + } + + return b.create(loc, newPadOp, offsets, sizes, strides) + .getResult(); +} + +FailureOr tensor::buildIndependentOp(OpBuilder &b, + tensor::EmptyOp emptyOp, + ValueRange independencies) { + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(emptyOp); + Location loc = emptyOp.getLoc(); + + SmallVector newSizes; + for (OpFoldResult ofr : emptyOp.getMixedSizes()) { + auto ub = makeIndependent(b, loc, ofr, independencies); + if (failed(ub)) + return failure(); + newSizes.push_back(*ub); + } + + // Return existing tensor::EmptyOp if nothing has changed. + if (llvm::equal(emptyOp.getMixedSizes(), newSizes)) + return emptyOp.getResult(); + + // Create a new tensor::EmptyOp. + Value newEmptyOp = + b.create(loc, newSizes, emptyOp.getType().getElementType()); + + // Create a tensor::ExtractSliceOp. + SmallVector offsets(newSizes.size(), b.getIndexAttr(0)); + SmallVector strides(newSizes.size(), b.getIndexAttr(1)); + return b + .create(loc, newEmptyOp, offsets, emptyOp.getMixedSizes(), + strides) + .getResult(); +} diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index 597bc7d..95fb785 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -356,7 +356,7 @@ LogicalResult ValueBoundsConstraintSet::computeBound( return success(); } -LogicalResult ValueBoundsConstraintSet::computeBound( +LogicalResult ValueBoundsConstraintSet::computeDependentBound( AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type, Value value, std::optional dim, ValueDimList dependencies, bool closedUB) { @@ -368,6 +368,40 @@ LogicalResult ValueBoundsConstraintSet::computeBound( closedUB); } +LogicalResult ValueBoundsConstraintSet::computeIndependentBound( + AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type, + Value value, std::optional dim, ValueRange independencies, + bool closedUB) { + // Return "true" if the given value is independent of all values in + // `independencies`. I.e., neither the value itself nor any value in the + // backward slice (reverse use-def chain) is contained in `independencies`. + auto isIndependent = [&](Value v) { + SmallVector worklist; + DenseSet visited; + worklist.push_back(v); + while (!worklist.empty()) { + Value next = worklist.pop_back_val(); + if (visited.contains(next)) + continue; + visited.insert(next); + if (llvm::is_contained(independencies, next)) + return false; + // TODO: DominanceInfo could be used to stop the traversal early. + Operation *op = next.getDefiningOp(); + if (!op) + continue; + worklist.append(op->getOperands().begin(), op->getOperands().end()); + } + return true; + }; + + // Reify bounds in terms of any independent values. + return computeBound( + resultMap, mapOperands, type, value, dim, + [&](Value v, std::optional d) { return isIndependent(v); }, + closedUB); +} + FailureOr ValueBoundsConstraintSet::computeConstantBound( presburger::BoundType type, Value value, std::optional dim, StopConditionFn stopCondition, bool closedUB) { diff --git a/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir new file mode 100644 index 0000000..18a99c5 --- /dev/null +++ b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir @@ -0,0 +1,151 @@ +// RUN: mlir-opt %s -allow-unregistered-dialect \ +// RUN: -test-transform-dialect-interpreter -canonicalize \ +// RUN: -split-input-file -verify-diagnostics | FileCheck %s + +// This is a test case where "high" padding depends on the IV. + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> +// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +// CHECK-LABEL: func @make_pad_loop_independent_1( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index, +// CHECK-SAME: %[[t:.*]]: tensor +func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]] + // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]] + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]] + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1] + %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %p = tensor.pad %t low[5] high[%high] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%p) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} + +// ----- + +// This is a test case where "low" padding depends on the IV. + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> +// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +// CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)> +// CHECK-LABEL: func @make_pad_loop_independent_1( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index, +// CHECK-SAME: %[[t:.*]]: tensor +func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]] + // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5] + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]] + // CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]] + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1] + %low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%p) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)> +// CHECK-LABEL: func @two_loops( +func.func @two_loops(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + // CHECK: affine.apply #map()[%{{.*}}] + %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[] + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + "dummy.some_use"(%p) : (tensor) -> () + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 2} +} + +// ----- + +func.func @not_enough_loops(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[] + // expected-note@below {{target op}} + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + "dummy.some_use"(%p) : (tensor) -> () + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + // expected-error@below {{could not find 2-th enclosing loop}} + %1 = transform.tensor.make_loop_independent %0 {num_loops = 3} +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> +// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> +// CHECK-LABEL: func @make_empty_loop_independent( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index) +func.func @make_empty_loop_independent(%lb: index, %ub: index, %step: index) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[slice_sz:.*]] = affine.apply #[[$map]](%[[iv]])[%[[ub]]] + // CHECK: %[[empty_sz:.*]] = affine.apply #[[$map1]]()[%[[ub]], %[[lb]]] + // CHECK: %[[empty:.*]] = tensor.empty(%[[empty_sz]]) : tensor + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[empty]][0] [%[[slice_sz]]] [1] + %sz = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %empty = tensor.empty(%sz) : tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%empty) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c941fac..dfa8f43 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5808,6 +5808,7 @@ cc_library( includes = ["include"], deps = [ ":AffineDialect", + ":AffineTransforms", ":AffineUtils", ":ArithDialect", ":ArithUtils", @@ -5824,20 +5825,57 @@ cc_library( ":TensorPassIncGen", ":TilingInterface", ":Transforms", + ":ValueBoundsOpInterface", ":VectorDialect", "//llvm:Support", ], ) +td_library( + name = "TensorTransformOpsTdFiles", + srcs = [ + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td", + ], + includes = ["include"], + deps = [ + ":PDLDialect", + ":TransformDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorTransformOpsIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-op-decls"], + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc", + ), + ( + ["-gen-op-defs"], + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td", + deps = [ + ":TensorTransformOpsTdFiles", + ], +) + cc_library( name = "TensorTransformOps", srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]), hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]), includes = ["include"], deps = [ + ":AffineDialect", ":IR", ":PDLDialect", + ":SCFDialect", ":TensorDialect", + ":TensorTransformOpsIncGen", + ":TensorTransforms", ":TransformDialect", "//llvm:Support", ], -- 2.7.4