[mlir][tensor] Add transform to make tensor.pad loop-independent
authorMatthias Springer <springerm@google.com>
Fri, 28 Apr 2023 01:34:03 +0000 (10:34 +0900)
committerMatthias Springer <springerm@google.com>
Fri, 28 Apr 2023 02:46:32 +0000 (11:46 +0900)
Add a transform to make `tensor.pad` and `tensor.empty` ops independent of SCF loop IVs. Such ops can then be hoisted.

E.g.:
```
scf.for %iv = %lb to %ub step %step {
  %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
  %p = tensor.pad %t low[5] high[%high] ...
  ...
}
```
Is transformed to:
```
%high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
%p_hoistable = tensor.pad %t low[5] high[%high_new]
%dim = tensor.dim %t, %c0
%size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>(%iv)[%ub, %dim]
%slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
```

Differential Revision: https://reviews.llvm.org/D143910

16 files changed:
mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt [new file with mode: 0644]
mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h
mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td [new file with mode: 0644]
mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
mlir/include/mlir/InitAllDialects.h
mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp
mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt
mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp [new file with mode: 0644]
mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir [new file with mode: 0644]
utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

index 02938d1..8e840e7 100644 (file)
 #define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
 
 #include "mlir/Interfaces/ValueBoundsOpInterface.h"
+#include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 
 namespace mlir {
+class AffineMap;
 class Location;
 class OpBuilder;
 class OpFoldResult;
@@ -85,6 +87,18 @@ FailureOr<OpFoldResult> reifyShapedValueDimBound(
     ValueBoundsConstraintSet::StopConditionFn stopCondition = nullptr,
     bool closedUB = false);
 
+/// Materialize an already computed bound with Affine dialect ops.
+///
+/// * `ValueBoundsOpInterface::computeBound` computes bounds but does not
+///   create IR. It is dialect independent.
+/// * `materializeComputedBound` materializes computed bounds with Affine
+///   dialect ops.
+/// * `reifyIndexValueBound`/`reifyShapedValueDimBound` are a combination of
+///   the two functions mentioned above.
+OpFoldResult materializeComputedBound(
+    OpBuilder &b, Location loc, AffineMap boundMap,
+    ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands);
+
 } // namespace affine
 } // namespace mlir
 
index 9f57627..cb1e9d0 100644 (file)
@@ -1,2 +1,3 @@
 add_subdirectory(IR)
 add_subdirectory(Transforms)
+add_subdirectory(TransformOps)
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt
new file mode 100644 (file)
index 0000000..bb9f703
--- /dev/null
@@ -0,0 +1,6 @@
+set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td)
+mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls)
+mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRTensorTransformOpsIncGen)
+
+add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc)
index d1b14d2..c735700 100644 (file)
 
 #include "mlir/Dialect/PDL/IR/PDLTypes.h"
 #include "mlir/Dialect/Transform/IR/TransformOps.h"
+#include "mlir/Dialect/Transform/IR/TransformTypes.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
 
 namespace mlir {
+class DialectRegistry;
+
 namespace tensor {
 
 /// A specialized TrackingListener for transform ops that operate on tensor IR.
@@ -29,7 +32,12 @@ protected:
                                ValueRange newValues) const override;
 };
 
+void registerTransformDialectExtension(DialectRegistry &registry);
+
 } // namespace tensor
 } // namespace mlir
 
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc"
+
 #endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
new file mode 100644 (file)
index 0000000..42be882
--- /dev/null
@@ -0,0 +1,64 @@
+//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TENSOR_TRANSFORM_OPS
+#define TENSOR_TRANSFORM_OPS
+
+include "mlir/Dialect/PDL/IR/PDLTypes.td"
+include "mlir/Dialect/Transform/IR/TransformDialect.td"
+include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
+include "mlir/Dialect/Transform/IR/TransformTypes.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/OpBase.td"
+
+def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
+
+def MakeLoopIndependentOp
+    : Op<Transform_Dialect, "tensor.make_loop_independent",
+         [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
+          TransformOpInterface, TransformEachOpTrait]> {
+  let description = [{
+    Rewrite the targeted ops such that their index-typed operands no longer
+    depend on any loop induction variable of the `num_loop` enclosing `scf.for`
+    loops. I.e., compute an upper bound that is independent of any such loop IV
+    for every tensor dimension. The transformed op could then be hoisted from
+    the `num_loop` enclosing loops. To preserve the original semantics, place a
+    `tensor.extract_slice` inside the loop.
+
+    Currently supported operations are:
+    - tensor.empty: Replaced with a new tensor.empty with upper bound sizes,
+      followed by a tensor.extract_slice.
+    - tensor.pad: Replaced by an upper bound padding, followed by a
+      tensor.extract_slice.
+
+    #### Return modes
+
+    This operation fails if at least one induction variable could not be
+    eliminated. In case the targeted op is already independent of induction
+    variables, this transform succeeds and returns the unmodified target op.
+
+    Otherwise, the returned handle points to a subset of the produced ops:
+    - tensor.empty: The returned handle points to the tensor.extract_slice op.
+    - tensor.pad: The returned handle points to the tensor.extract_slice op.
+
+    This transform op consumes the target handle and produces a result handle.
+  }];
+
+  let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops);
+  let results = (outs PDL_Operation:$transformed);
+  let assemblyFormat = "$target attr-dict";
+
+  let extraClassDeclaration = [{
+    ::mlir::DiagnosedSilenceableFailure applyToOne(
+        ::mlir::Operation *target,
+        ::mlir::transform::ApplyToEachResultList &results,
+        ::mlir::transform::TransformState &state);
+  }];
+}
+
+#endif // TENSOR_TRANSFORM_OPS
index a3b5abf..9922dc8 100644 (file)
@@ -61,6 +61,45 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns);
 /// respectively.
 void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
 
+//===----------------------------------------------------------------------===//
+// Transform helpers
+//===----------------------------------------------------------------------===//
+
+/// Build a new tensor::PadOp with low/high padding that is independent of all
+/// given independencies. If the op is already independent of all
+/// independencies, the same PadOp result is returned.
+///
+/// Failure indicates the no suitable upper bound for low/high padding could be
+/// found.
+///
+/// Example:
+/// scf.for %iv = %lb to %ub step %step {
+///   %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+///   %p = tensor.pad %t low[5] high[%high] ...
+///   ...
+/// }
+///
+/// The function builds IR such as:
+/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
+/// %p_hoistable = tensor.pad %t low[5] high[%high_new]
+/// %dim = tensor.dim %t, %c0
+/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+///     (%iv)[%ub, %dim]
+/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
+///
+/// The slice is returned.
+FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
+                                    ValueRange independencies);
+
+/// Build a new tensor::EmptyOp who's dynamic sizes are independent of all
+/// given independencies. If the op is already independent of all
+/// independencies, the same EmptyOp result is returned.
+///
+/// Failure indicates the no suitable upper bound for the dynamic sizes could be
+/// found.
+FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::EmptyOp emptyOp,
+                                    ValueRange independencies);
+
 } // namespace tensor
 } // namespace mlir
 
index 6f78bab..560b683 100644 (file)
@@ -71,6 +71,7 @@
 #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
 #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
 #include "mlir/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
 #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Transform/IR/TransformDialect.h"
@@ -132,6 +133,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
   linalg::registerTransformDialectExtension(registry);
   memref::registerTransformDialectExtension(registry);
   scf::registerTransformDialectExtension(registry);
+  tensor::registerTransformDialectExtension(registry);
   vector::registerTransformDialectExtension(registry);
 
   // Register all external models.
index b4d070a..ac71b73 100644 (file)
@@ -114,12 +114,25 @@ public:
   /// Compute a bound in terms of the values/dimensions in `dependencies`. The
   /// computed bound consists of only constant terms and dependent values (or
   /// dimension sizes thereof).
-  static LogicalResult computeBound(AffineMap &resultMap,
-                                    ValueDimList &mapOperands,
-                                    presburger::BoundType type, Value value,
-                                    std::optional<int64_t> dim,
-                                    ValueDimList dependencies,
-                                    bool closedUB = false);
+  static LogicalResult
+  computeDependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
+                        presburger::BoundType type, Value value,
+                        std::optional<int64_t> dim, ValueDimList dependencies,
+                        bool closedUB = false);
+
+  /// Compute a bound in that is independent of all values in `independencies`.
+  ///
+  /// Independencies are the opposite of dependencies. The computed bound does
+  /// not contain any SSA values that are part of `independencies`. E.g., this
+  /// function can be used to make ops hoistable from loops. To that end, ops
+  /// must be made independent of loop induction variables (in the case of "for"
+  /// loops). Loop induction variables are the independencies; they may not
+  /// appear in the computed bound.
+  static LogicalResult
+  computeIndependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
+                          presburger::BoundType type, Value value,
+                          std::optional<int64_t> dim, ValueRange independencies,
+                          bool closedUB = false);
 
   /// Compute a constant bound for the given index-typed value or shape
   /// dimension size.
index 0efe31c..4990229 100644 (file)
@@ -19,7 +19,7 @@ using namespace mlir::affine;
 static FailureOr<OpFoldResult>
 reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
                 Value value, std::optional<int64_t> dim,
-                function_ref<bool(Value, std::optional<int64_t>)> stopCondition,
+                ValueBoundsConstraintSet::StopConditionFn stopCondition,
                 bool closedUB) {
   // Compute bound.
   AffineMap boundMap;
@@ -28,6 +28,13 @@ reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
           boundMap, mapOperands, type, value, dim, stopCondition, closedUB)))
     return failure();
 
+  // Reify bound.
+  return affine::materializeComputedBound(b, loc, boundMap, mapOperands);
+}
+
+OpFoldResult affine::materializeComputedBound(
+    OpBuilder &b, Location loc, AffineMap boundMap,
+    ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands) {
   // Materialize tensor.dim/memref.dim ops.
   SmallVector<Value> operands;
   for (auto valueDim : mapOperands) {
index e8be901..be1a5dd 100644 (file)
@@ -4,9 +4,15 @@ add_mlir_dialect_library(MLIRTensorTransformOps
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps
 
+  DEPENDS
+  MLIRTensorTransformOpsIncGen
+
   LINK_LIBS PUBLIC
+  MLIRAffineDialect
   MLIRIR
   MLIRPDLDialect
+  MLIRSCFDialect
   MLIRTensorDialect
+  MLIRTensorTransforms
   MLIRTransformDialect
 )
index 01e1a35..4394465 100644 (file)
@@ -8,8 +8,12 @@
 
 #include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
 #include "mlir/Dialect/Transform/IR/TransformDialect.h"
+#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 using namespace mlir;
@@ -49,3 +53,80 @@ tensor::TrackingListener::findReplacementOp(Operation *op,
 
   return nullptr;
 }
+
+//===----------------------------------------------------------------------===//
+// MakeLoopIndependentOp
+//===----------------------------------------------------------------------===//
+
+DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne(
+    Operation *target, transform::ApplyToEachResultList &results,
+    transform::TransformState &state) {
+  // Gather IVs.
+  SmallVector<Value> ivs;
+  Operation *nextOp = target;
+  for (uint64_t i = 0, e = getNumLoops(); i < e; ++i) {
+    nextOp = nextOp->getParentOfType<scf::ForOp>();
+    if (!nextOp) {
+      DiagnosedSilenceableFailure diag = emitSilenceableError()
+                                         << "could not find " << i
+                                         << "-th enclosing loop";
+      diag.attachNote(target->getLoc()) << "target op";
+      return diag;
+    }
+    ivs.push_back(cast<scf::ForOp>(nextOp).getInductionVar());
+  }
+
+  // Rewrite IR.
+  IRRewriter rewriter(target->getContext());
+  FailureOr<Value> replacement = failure();
+  if (auto padOp = dyn_cast<tensor::PadOp>(target)) {
+    replacement = tensor::buildIndependentOp(rewriter, padOp, ivs);
+  } else if (auto emptyOp = dyn_cast<tensor::EmptyOp>(target)) {
+    replacement = tensor::buildIndependentOp(rewriter, emptyOp, ivs);
+  } else {
+    DiagnosedSilenceableFailure diag = emitSilenceableError()
+                                       << "unsupported target op";
+    diag.attachNote(target->getLoc()) << "target op";
+    return diag;
+  }
+  if (failed(replacement)) {
+    DiagnosedSilenceableFailure diag =
+        emitSilenceableError() << "could not make target op loop-independent";
+    diag.attachNote(target->getLoc()) << "target op";
+    return diag;
+  }
+  rewriter.replaceOp(target, *replacement);
+  results.push_back(replacement->getDefiningOp());
+  return DiagnosedSilenceableFailure::success();
+}
+
+//===----------------------------------------------------------------------===//
+// Transform op registration
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TensorTransformDialectExtension
+    : public transform::TransformDialectExtension<
+          TensorTransformDialectExtension> {
+public:
+  using Base::Base;
+
+  void init() {
+    declareGeneratedDialect<affine::AffineDialect>();
+    declareGeneratedDialect<tensor::TensorDialect>();
+
+    registerTransformOps<
+#define GET_OP_LIST
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
+        >();
+  }
+};
+} // namespace
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
+
+void mlir::tensor::registerTransformDialectExtension(
+    DialectRegistry &registry) {
+  registry.addExtensions<TensorTransformDialectExtension>();
+}
index 4457954..c41e9e9 100644 (file)
@@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
   ExtractSliceFromReshapeUtils.cpp
   FoldIntoPackAndUnpackPatterns.cpp
   FoldTensorSubsetOps.cpp
+  IndependenceTransforms.cpp
   MergeConsecutiveInsertExtractSlicePatterns.cpp
   ReshapePatterns.cpp
   SwapExtractSliceWithProducerPatterns.cpp
@@ -17,6 +18,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRAffineDialect
+  MLIRAffineTransforms
   MLIRAffineUtils
   MLIRArithDialect
   MLIRBufferizationDialect
@@ -30,4 +32,5 @@ add_mlir_dialect_library(MLIRTensorTransforms
   MLIRTilingInterface
   MLIRTransforms
   MLIRVectorDialect
+  MLIRValueBoundsOpInterface
 )
diff --git a/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp
new file mode 100644 (file)
index 0000000..7217308
--- /dev/null
@@ -0,0 +1,136 @@
+//===- IndependenceTransforms.cpp - Make ops independent of values --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Transforms/Transforms.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+/// Make the given OpFoldResult independent of all independencies.
+static FailureOr<OpFoldResult> makeIndependent(OpBuilder &b, Location loc,
+                                               OpFoldResult ofr,
+                                               ValueRange independencies) {
+  if (ofr.is<Attribute>())
+    return ofr;
+  Value value = ofr.get<Value>();
+  AffineMap boundMap;
+  ValueDimList mapOperands;
+  if (failed(ValueBoundsConstraintSet::computeIndependentBound(
+          boundMap, mapOperands, presburger::BoundType::UB, value,
+          /*dim=*/std::nullopt, independencies, /*closedUB=*/true)))
+    return failure();
+  return mlir::affine::materializeComputedBound(b, loc, boundMap, mapOperands);
+}
+
+FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
+                                            ValueRange independencies) {
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPoint(padOp);
+  Location loc = padOp.getLoc();
+
+  // Non-constant padding not supported.
+  Value constantPadding = padOp.getConstantPaddingValue();
+  if (!constantPadding)
+    return failure();
+
+  SmallVector<OpFoldResult> newMixedLow, newMixedHigh;
+  for (OpFoldResult ofr : padOp.getMixedLowPad()) {
+    auto ub = makeIndependent(b, loc, ofr, independencies);
+    if (failed(ub))
+      return failure();
+    newMixedLow.push_back(*ub);
+  }
+  for (OpFoldResult ofr : padOp.getMixedHighPad()) {
+    auto ub = makeIndependent(b, loc, ofr, independencies);
+    if (failed(ub))
+      return failure();
+    newMixedHigh.push_back(*ub);
+  }
+
+  // Return existing tensor::PadOp if nothing has changed.
+  if (llvm::equal(padOp.getMixedLowPad(), newMixedLow) &&
+      llvm::equal(padOp.getMixedHighPad(), newMixedHigh))
+    return padOp.getResult();
+
+  // Create a new tensor::PadOp.
+  auto newPadOp = b.create<PadOp>(
+      loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh,
+      constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef<NamedAttribute>{});
+
+  // Create a tensor::ExtractSliceOp.
+  // Reify the result sizes of the old tensor::PadOp.
+  ReifiedRankedShapedTypeDims reifiedSizes;
+  ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+      dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
+  if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes)))
+    return failure();
+  SmallVector<OpFoldResult> offsets, sizes, strides;
+  for (int64_t i = 0, e = padOp.getResultType().getRank(); i < e; ++i) {
+    // offset = ub(low_padding) - low_padding
+    OpFoldResult prevLow = padOp.getMixedLowPad()[i];
+    if (prevLow.is<Attribute>()) {
+      offsets.push_back(b.getIndexAttr(0));
+    } else {
+      offsets.push_back(
+          b.create<affine::AffineApplyOp>(
+               loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1),
+               std::initializer_list<Value>{newMixedLow[i].get<Value>(),
+                                            prevLow.get<Value>()})
+              .getResult());
+    }
+    // size = reified result size
+    if (!padOp.getResultType().isDynamicDim(i)) {
+      sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i)));
+    } else {
+      sizes.push_back(reifiedSizes[0][i]);
+    }
+    // stride = 1
+    strides.push_back(b.getIndexAttr(1));
+  }
+
+  return b.create<ExtractSliceOp>(loc, newPadOp, offsets, sizes, strides)
+      .getResult();
+}
+
+FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b,
+                                            tensor::EmptyOp emptyOp,
+                                            ValueRange independencies) {
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPoint(emptyOp);
+  Location loc = emptyOp.getLoc();
+
+  SmallVector<OpFoldResult> newSizes;
+  for (OpFoldResult ofr : emptyOp.getMixedSizes()) {
+    auto ub = makeIndependent(b, loc, ofr, independencies);
+    if (failed(ub))
+      return failure();
+    newSizes.push_back(*ub);
+  }
+
+  // Return existing tensor::EmptyOp if nothing has changed.
+  if (llvm::equal(emptyOp.getMixedSizes(), newSizes))
+    return emptyOp.getResult();
+
+  // Create a new tensor::EmptyOp.
+  Value newEmptyOp =
+      b.create<EmptyOp>(loc, newSizes, emptyOp.getType().getElementType());
+
+  // Create a tensor::ExtractSliceOp.
+  SmallVector<OpFoldResult> offsets(newSizes.size(), b.getIndexAttr(0));
+  SmallVector<OpFoldResult> strides(newSizes.size(), b.getIndexAttr(1));
+  return b
+      .create<ExtractSliceOp>(loc, newEmptyOp, offsets, emptyOp.getMixedSizes(),
+                              strides)
+      .getResult();
+}
index 597bc7d..95fb785 100644 (file)
@@ -356,7 +356,7 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
   return success();
 }
 
-LogicalResult ValueBoundsConstraintSet::computeBound(
+LogicalResult ValueBoundsConstraintSet::computeDependentBound(
     AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
     Value value, std::optional<int64_t> dim, ValueDimList dependencies,
     bool closedUB) {
@@ -368,6 +368,40 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
       closedUB);
 }
 
+LogicalResult ValueBoundsConstraintSet::computeIndependentBound(
+    AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
+    Value value, std::optional<int64_t> dim, ValueRange independencies,
+    bool closedUB) {
+  // Return "true" if the given value is independent of all values in
+  // `independencies`. I.e., neither the value itself nor any value in the
+  // backward slice (reverse use-def chain) is contained in `independencies`.
+  auto isIndependent = [&](Value v) {
+    SmallVector<Value> worklist;
+    DenseSet<Value> visited;
+    worklist.push_back(v);
+    while (!worklist.empty()) {
+      Value next = worklist.pop_back_val();
+      if (visited.contains(next))
+        continue;
+      visited.insert(next);
+      if (llvm::is_contained(independencies, next))
+        return false;
+      // TODO: DominanceInfo could be used to stop the traversal early.
+      Operation *op = next.getDefiningOp();
+      if (!op)
+        continue;
+      worklist.append(op->getOperands().begin(), op->getOperands().end());
+    }
+    return true;
+  };
+
+  // Reify bounds in terms of any independent values.
+  return computeBound(
+      resultMap, mapOperands, type, value, dim,
+      [&](Value v, std::optional<int64_t> d) { return isIndependent(v); },
+      closedUB);
+}
+
 FailureOr<int64_t> ValueBoundsConstraintSet::computeConstantBound(
     presburger::BoundType type, Value value, std::optional<int64_t> dim,
     StopConditionFn stopCondition, bool closedUB) {
diff --git a/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
new file mode 100644 (file)
index 0000000..18a99c5
--- /dev/null
@@ -0,0 +1,151 @@
+// RUN: mlir-opt %s -allow-unregistered-dialect \
+// RUN:     -test-transform-dialect-interpreter -canonicalize \
+// RUN:     -split-input-file -verify-diagnostics | FileCheck %s
+
+// This is a test case where "high" padding depends on the IV.
+
+//       CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
+//       CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+// CHECK-LABEL: func @make_pad_loop_independent_1(
+//  CHECK-SAME:     %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
+//  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
+func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
+                                       %t: tensor<?xf32>, %f: f32) {
+  // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+  scf.for %i = %lb to %ub step %step {
+    // CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
+    // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]]
+    // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+    // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
+    // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1]
+    %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+    %p = tensor.pad %t low[5] high[%high] {
+    ^bb0(%arg1: index):
+      tensor.yield %f : f32
+    } : tensor<?xf32> to tensor<?xf32>
+    // CHECK: "dummy.some_use"(%[[replacement]])
+    "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
+
+// -----
+
+// This is a test case where "low" padding depends on the IV.
+
+//       CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
+//       CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+//       CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
+// CHECK-LABEL: func @make_pad_loop_independent_1(
+//  CHECK-SAME:     %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
+//  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
+func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
+                                       %t: tensor<?xf32>, %f: f32) {
+  // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+  scf.for %i = %lb to %ub step %step {
+    // CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
+    // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5]
+    // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+    // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
+    // CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]]
+    // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1]
+    %low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+    %p = tensor.pad %t low[%low] high[5] {
+    ^bb0(%arg1: index):
+      tensor.yield %f : f32
+    } : tensor<?xf32> to tensor<?xf32>
+    // CHECK: "dummy.some_use"(%[[replacement]])
+    "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
+
+// -----
+
+//       CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)>
+// CHECK-LABEL: func @two_loops(
+func.func @two_loops(%lb: index, %ub: index, %step: index,
+                     %t: tensor<?xf32>, %f: f32) {
+  scf.for %i = %lb to %ub step %step {
+    scf.for %j = %lb to %ub step %step {
+      // CHECK: affine.apply #map()[%{{.*}}]
+      %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
+      %p = tensor.pad %t low[%low] high[5] {
+      ^bb0(%arg1: index):
+        tensor.yield %f : f32
+      } : tensor<?xf32> to tensor<?xf32>
+      "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+    }
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 2}
+}
+
+// -----
+
+func.func @not_enough_loops(%lb: index, %ub: index, %step: index,
+                            %t: tensor<?xf32>, %f: f32) {
+  scf.for %i = %lb to %ub step %step {
+    scf.for %j = %lb to %ub step %step {
+      %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
+      // expected-note@below {{target op}}
+      %p = tensor.pad %t low[%low] high[5] {
+      ^bb0(%arg1: index):
+        tensor.yield %f : f32
+      } : tensor<?xf32> to tensor<?xf32>
+      "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+    }
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  // expected-error@below {{could not find 2-th enclosing loop}}
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 3}
+}
+
+// -----
+
+// CHECK: #[[$map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
+// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
+// CHECK-LABEL: func @make_empty_loop_independent(
+//  CHECK-SAME:     %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index)
+func.func @make_empty_loop_independent(%lb: index, %ub: index, %step: index) {
+  // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+  scf.for %i = %lb to %ub step %step {
+    // CHECK: %[[slice_sz:.*]] = affine.apply #[[$map]](%[[iv]])[%[[ub]]]
+    // CHECK: %[[empty_sz:.*]] = affine.apply #[[$map1]]()[%[[ub]], %[[lb]]]
+    // CHECK: %[[empty:.*]] = tensor.empty(%[[empty_sz]]) : tensor<?xf32>
+    // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[empty]][0] [%[[slice_sz]]] [1]
+    %sz = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+    %empty = tensor.empty(%sz) : tensor<?xf32>
+    // CHECK: "dummy.some_use"(%[[replacement]])
+    "dummy.some_use"(%empty) : (tensor<?xf32>) -> ()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
index c941fac..dfa8f43 100644 (file)
@@ -5808,6 +5808,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":AffineDialect",
+        ":AffineTransforms",
         ":AffineUtils",
         ":ArithDialect",
         ":ArithUtils",
@@ -5824,20 +5825,57 @@ cc_library(
         ":TensorPassIncGen",
         ":TilingInterface",
         ":Transforms",
+        ":ValueBoundsOpInterface",
         ":VectorDialect",
         "//llvm:Support",
     ],
 )
 
+td_library(
+    name = "TensorTransformOpsTdFiles",
+    srcs = [
+        "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
+    ],
+    includes = ["include"],
+    deps = [
+        ":PDLDialect",
+        ":TransformDialectTdFiles",
+    ],
+)
+
+gentbl_cc_library(
+    name = "TensorTransformOpsIncGen",
+    strip_include_prefix = "include",
+    tbl_outs = [
+        (
+            ["-gen-op-decls"],
+            "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc",
+        ),
+        (
+            ["-gen-op-defs"],
+            "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc",
+        ),
+    ],
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
+    deps = [
+        ":TensorTransformOpsTdFiles",
+    ],
+)
+
 cc_library(
     name = "TensorTransformOps",
     srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]),
     hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]),
     includes = ["include"],
     deps = [
+        ":AffineDialect",
         ":IR",
         ":PDLDialect",
+        ":SCFDialect",
         ":TensorDialect",
+        ":TensorTransformOpsIncGen",
+        ":TensorTransforms",
         ":TransformDialect",
         "//llvm:Support",
     ],