[mlir][tensor] Remove folding of tensor.extract_slice during tiling

author Thomas Raoux <thomasraoux@google.com>

Thu, 25 Aug 2022 15:27:40 +0000 (15:27 +0000)

committer Thomas Raoux <thomasraoux@google.com>

Fri, 26 Aug 2022 14:30:39 +0000 (14:30 +0000)
author Thomas Raoux <thomasraoux@google.com>
Thu, 25 Aug 2022 15:27:40 +0000 (15:27 +0000)
committer Thomas Raoux <thomasraoux@google.com>
Fri, 26 Aug 2022 14:30:39 +0000 (14:30 +0000)
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h

index c3806e0..382e8ce 100644 (file)
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -93,25 +93,6 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
  /// (boundsMap = affine.map<() -> (42)>)
  FailureOr<int64_t> getConstantUpperBoundForIndex(Value value);
  
-/// Create an ExtractSliceOp and, if `source` is defined by an ExtractSliceOp,
-/// fold it by adding the offsets.
-///
-/// Example:
-/// ```
-/// %0 = tensor.extract_slice %arg0[3, 4][3, 32][1, 1] : tensor<64x64xf32> to
-///                                                        tensor<3x32xf32>
-/// %1 = tensor.extract_slice %0[0, 5][3, 4][1, 1] : tensor<3x32xf32> to
-///                                                    tensor<3x4xf32>
-/// ```
-/// folds into:
-/// ```
-/// %1 = tensor.extract_slice %arg0[3, 9][3, 4][1, 1] : tensor<64x64xf32> to
-///                                                       tensor<3x4xf32>
-/// ```
-tensor::ExtractSliceOp makeComposedExtractSliceOp(
-    OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
-    ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides);
-
  /// Create a tensor::PadOp that pads `source` to the size of the statically
  /// sized `type` whose static sizes are assumed to be greater than the dynamic
  /// `source` size. The padding introduces trailing `pad` values until the target
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp

index a9c44f0..9a170dd 100644 (file)
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -346,48 +346,6 @@ FailureOr<int64_t> getConstantUpperBoundForIndex(Value value) {
    return *std::min_element(constantBounds.begin(), constantBounds.end());
  }
  
-tensor::ExtractSliceOp makeComposedExtractSliceOp(
-    OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
-    ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides) {
-  assert(source && "expect source to be nonzero");
-
-  // Do not fold if the producer is not an ExtractSliceOp.
-  auto producerOp = source.getDefiningOp<tensor::ExtractSliceOp>();
-  if (!producerOp)
-    return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
-                                            strides);
-
-  // Do not fold if the producer is rank reducing or if there are any non-unit
-  // strides. Supporting non-unit strides complicates the offset computation
-  // since the consumer offsets need to be multiplied by the producer strides.
-  // TODO: support non-unit strides once there are use cases.
-  SmallVector<OpFoldResult> allStrides = producerOp.getMixedStrides();
-  allStrides.append(strides.begin(), strides.end());
-  bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) {
-    return getConstantIntValue(ofr) != static_cast<int64_t>(1);
-  });
-  if (hasNonUnitStride ||
-      producerOp.getSourceType().getRank() !=
-          producerOp.getResult().getType().cast<ShapedType>().getRank())
-    return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
-                                            strides);
-
-  // Fold the producer by adding the offests and extracting the slice directly
-  // from the producer source tensor.
-  SmallVector<OpFoldResult> foldedOffsets(offsets.begin(), offsets.end());
-  AffineExpr dim1, dim2;
-  bindDims(b.getContext(), dim1, dim2);
-  for (const auto &en : enumerate(producerOp.getMixedOffsets())) {
-    SmallVector<Value> offsetValues = {
-        getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]),
-        getValueOrCreateConstantIndexOp(b, loc, en.value())};
-    foldedOffsets[en.index()] =
-        makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult();
-  }
-  return b.create<tensor::ExtractSliceOp>(loc, producerOp.getSource(),
-                                          foldedOffsets, sizes, strides);
-}
-
  Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                              Value source, Value pad, bool nofold) {
    // Exit if `source` is not defined by an ExtractSliceOp.
@@ -777,8 +735,8 @@ static Value materializeTiledShape(OpBuilder &builder, Location loc,
                              sliceParams.sizes, sliceParams.strides);
                        })
                        .Case([&](RankedTensorType) {
-                        return makeComposedExtractSliceOp(
-                            builder, loc, valueToTile, sliceParams.offsets,
+                        return builder.create<tensor::ExtractSliceOp>(
+                            loc, valueToTile, sliceParams.offsets,
                              sliceParams.sizes, sliceParams.strides);
                        })
                        .Default([](ShapedType) -> Operation * {
diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir

index ecafcad..df4bcd4 100644 (file)
--- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
+++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
@@ -47,13 +47,16 @@ func.func @two_d(%arg0: tensor<10x34xf32>,
    // The canonicalizer is able to recover static shapes of for linalg.generic
    // instances, use those to differentiate the quadrants.
  
+  // CHECK:      %[[SLICE_1_IN:.+]] = tensor.extract_slice %[[IN]][0, 0] [4, 34] [1, 1]
    // CHECK:      %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1]
    // CHECK:      scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]])
+  // CHECK:        %[[OUTSLICE_1_IN:.+]] = tensor.extract_slice %[[SLICE_1_IN]][%[[I1]], 0] [2, 34] [1, 1]
    // CHECK:        %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1]
  
-  // CHECK:        %[[SLICE_2:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 16] [1, 1]
+  // CHECK:        %[[SLICE_2_IN:.+]] = tensor.extract_slice %[[OUTSLICE_1_IN]][0, 0] [2, 16] [1, 1]
+  // CHECK:        %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1]
    // CHECK:        %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
-  // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[IN]][%[[I1]], %[[I2]]] [2, 8] [1, 1]
+  // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1]
    // CHECK:          %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
    // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
    // CHECK:          %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]
diff --git a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir

index bf0e96d..9d06888 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
@@ -23,11 +23,13 @@ func.func @fill_matmul_tensors(
  //      CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
  //      CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
  //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
+//      CHECK:     %[[OUTSLICEA:.+]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:     %[[OUTSLICEB:.+]] = tensor.extract_slice %{{.*}}[0, %{{.*}}] [%{{.*}}, %{{.*}}] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
  //      CHECK:     %[[SLICE:.+]] = tensor.extract_slice %[[TC1]]
  //      CHECK:     %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[SLICE]]
  //      CHECK:     %[[sTD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[FILL]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[OUTSLICEA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[OUTSLICEB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
  //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
  //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
  // CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir

index 7bf8e30..cfd68e5 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -77,8 +77,6 @@ func.func @generic_op_tensors(
  // -----
  
  //  CHECK-DAG:  #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<(d0) -> (d0 + 3)>
-//  CHECK-DAG:  #[[MAP2:.*]] = affine_map<(d0) -> (d0 + 4)>
  
  //      CHECK:  fold_extract_slice
  // CHECK-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x128xf32>
@@ -93,15 +91,15 @@ func.func @fold_extract_slice(
    %0 = tensor.dim %arg1, %c0 : tensor<?x42xf32>
    %1 = tensor.extract_slice %arg0[3, 4] [%0, 42] [1, 1] : tensor<?x128xf32> to tensor<?x42xf32>
  
+  //      CHECK:   %[[E:.*]] = tensor.extract_slice %[[ARG0]][3, 4] [%[[DIM]], 42] [1, 1] : tensor<?x128xf32> to tensor<?x42xf32>
+
    //      CHECK:    scf.for %[[IV0:[0-9a-zA-Z]*]] =
    //      CHECK:      scf.for %[[IV1:[0-9a-zA-Z]*]] =
  
    // Fold the existing extract slice op into the one created by the tiling.
    //      CHECK:        %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]]
-  //      CHECK:        %[[OFF0:.*]] = affine.apply #[[MAP1]](%[[IV0]]
-  //      CHECK:        %[[OFF1:.*]] = affine.apply #[[MAP2]](%[[IV1]]
-  //      CHECK:        %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
-  // CHECK-SAME:                                          %[[OFF0]], %[[OFF1]]
+  //      CHECK:        %[[T0:.*]] = tensor.extract_slice %[[E]]
+  // CHECK-SAME:                                          %[[IV0]], %[[IV1]]
    // CHECK-SAME:                                          %[[SIZE0]], 3
    // CHECK-SAME:                                          1, 1
    //      CHECK:        {{.*}} = linalg.generic {{.*}} ins(%[[T0]]
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir

index 8c133d7..39227e6 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -66,11 +66,12 @@ func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf3
  //   CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
  //       CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]])
  //       CHECK:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]])
-//       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
-//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor<?x?xf32>)
+//       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]]
+//       CHECK:     %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
+//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor<?x?xf32>)
  //       CHECK:     %[[C4:.+]] = arith.constant 4 : index
  //       CHECK:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]])
-//       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]]
+//       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]]
  //       CHECK:       %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]
  //       CHECK:       linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) outs(%[[OUT_SLICE2]] : tensor<?x?xf32>)
  
diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir

index ebf3b25..8ae34bd 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-op-split.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir
@@ -153,14 +153,14 @@ func.func @two_d(%arg0: tensor<10x34xf32>,
    // CHECK:      %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]]
    // Note that `extract_slice` taking a slice from another `extract_slice` result
    // is folded to use the operand of the first `extract_slice`.
-  // CHECK:      %[[IN_21:.+]] = tensor.extract_slice %[[IN]]
-  // CHECK:      %[[OUT_21:.+]] = tensor.extract_slice %[[PARTIAL_1]]
+  // CHECK:      %[[IN_21:.+]] = tensor.extract_slice %[[IN_2]]
+  // CHECK:      %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]]
    // CHECK:      %[[RES_21:.+]] = linalg.generic
    // CHECK-SAME:   ins(%[[IN_21]] : tensor<6x16xf32>)
    // CHECK-SAME:   outs(%[[OUT_21]] : tensor<6x16xf32>)
    // CHECK:      %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]
    //
-  // CHECK:      %[[IN_22:.+]] = tensor.extract_slice %[[IN]]
+  // CHECK:      %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]]
    // CHECK:      %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]
    // CHECK:      %[[RES_22:.+]] = linalg.generic
    // CHECK-SAME:   ins(%[[IN_22]] : tensor<6x18xf32>)
author	Thomas Raoux <thomasraoux@google.com>
	Thu, 25 Aug 2022 15:27:40 +0000 (15:27 +0000)
committer	Thomas Raoux <thomasraoux@google.com>
	Fri, 26 Aug 2022 14:30:39 +0000 (14:30 +0000)
mlir/include/mlir/Dialect/Linalg/Utils/Utils.h		patch \| blob \| history
mlir/lib/Dialect/Linalg/Utils/Utils.cpp		patch \| blob \| history
mlir/test/Dialect/Linalg/multisize-tiling-full.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-tensors.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-op-fuse.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-op-split.mlir		patch \| blob \| history