From: Thomas Raoux Date: Thu, 25 Aug 2022 15:27:40 +0000 (+0000) Subject: [mlir][tensor] Remove folding of tensor.extract_slice during tiling X-Git-Tag: upstream/17.0.6~35279 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2e34599bfd01e5b20e09bd6af590a52d6a63a64c;p=platform%2Fupstream%2Fllvm.git [mlir][tensor] Remove folding of tensor.extract_slice during tiling Blindly folding tensor.extract_slice makes the bufferization transformation harder. This kind of transformation should happen separatley if needed rather than doing it within makeShape that is called during tiling. Also removed makeComposedExtractSliceOp as it is not tested outside of this code. Differential Revision: https://reviews.llvm.org/D132666 --- diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index c3806e0..382e8ce 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -93,25 +93,6 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, /// (boundsMap = affine.map<() -> (42)>) FailureOr getConstantUpperBoundForIndex(Value value); -/// Create an ExtractSliceOp and, if `source` is defined by an ExtractSliceOp, -/// fold it by adding the offsets. -/// -/// Example: -/// ``` -/// %0 = tensor.extract_slice %arg0[3, 4][3, 32][1, 1] : tensor<64x64xf32> to -/// tensor<3x32xf32> -/// %1 = tensor.extract_slice %0[0, 5][3, 4][1, 1] : tensor<3x32xf32> to -/// tensor<3x4xf32> -/// ``` -/// folds into: -/// ``` -/// %1 = tensor.extract_slice %arg0[3, 9][3, 4][1, 1] : tensor<64x64xf32> to -/// tensor<3x4xf32> -/// ``` -tensor::ExtractSliceOp makeComposedExtractSliceOp( - OpBuilder &b, Location loc, Value source, ArrayRef offsets, - ArrayRef sizes, ArrayRef strides); - /// Create a tensor::PadOp that pads `source` to the size of the statically /// sized `type` whose static sizes are assumed to be greater than the dynamic /// `source` size. The padding introduces trailing `pad` values until the target diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index a9c44f0..9a170dd 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -346,48 +346,6 @@ FailureOr getConstantUpperBoundForIndex(Value value) { return *std::min_element(constantBounds.begin(), constantBounds.end()); } -tensor::ExtractSliceOp makeComposedExtractSliceOp( - OpBuilder &b, Location loc, Value source, ArrayRef offsets, - ArrayRef sizes, ArrayRef strides) { - assert(source && "expect source to be nonzero"); - - // Do not fold if the producer is not an ExtractSliceOp. - auto producerOp = source.getDefiningOp(); - if (!producerOp) - return b.create(loc, source, offsets, sizes, - strides); - - // Do not fold if the producer is rank reducing or if there are any non-unit - // strides. Supporting non-unit strides complicates the offset computation - // since the consumer offsets need to be multiplied by the producer strides. - // TODO: support non-unit strides once there are use cases. - SmallVector allStrides = producerOp.getMixedStrides(); - allStrides.append(strides.begin(), strides.end()); - bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) { - return getConstantIntValue(ofr) != static_cast(1); - }); - if (hasNonUnitStride || - producerOp.getSourceType().getRank() != - producerOp.getResult().getType().cast().getRank()) - return b.create(loc, source, offsets, sizes, - strides); - - // Fold the producer by adding the offests and extracting the slice directly - // from the producer source tensor. - SmallVector foldedOffsets(offsets.begin(), offsets.end()); - AffineExpr dim1, dim2; - bindDims(b.getContext(), dim1, dim2); - for (const auto &en : enumerate(producerOp.getMixedOffsets())) { - SmallVector offsetValues = { - getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]), - getValueOrCreateConstantIndexOp(b, loc, en.value())}; - foldedOffsets[en.index()] = - makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult(); - } - return b.create(loc, producerOp.getSource(), - foldedOffsets, sizes, strides); -} - Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold) { // Exit if `source` is not defined by an ExtractSliceOp. @@ -777,8 +735,8 @@ static Value materializeTiledShape(OpBuilder &builder, Location loc, sliceParams.sizes, sliceParams.strides); }) .Case([&](RankedTensorType) { - return makeComposedExtractSliceOp( - builder, loc, valueToTile, sliceParams.offsets, + return builder.create( + loc, valueToTile, sliceParams.offsets, sliceParams.sizes, sliceParams.strides); }) .Default([](ShapedType) -> Operation * { diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir index ecafcad..df4bcd4 100644 --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -47,13 +47,16 @@ func.func @two_d(%arg0: tensor<10x34xf32>, // The canonicalizer is able to recover static shapes of for linalg.generic // instances, use those to differentiate the quadrants. + // CHECK: %[[SLICE_1_IN:.+]] = tensor.extract_slice %[[IN]][0, 0] [4, 34] [1, 1] // CHECK: %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1] // CHECK: scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]]) + // CHECK: %[[OUTSLICE_1_IN:.+]] = tensor.extract_slice %[[SLICE_1_IN]][%[[I1]], 0] [2, 34] [1, 1] // CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1] - // CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 16] [1, 1] + // CHECK: %[[SLICE_2_IN:.+]] = tensor.extract_slice %[[OUTSLICE_1_IN]][0, 0] [2, 16] [1, 1] + // CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1] // CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]]) - // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[IN]][%[[I1]], %[[I2]]] [2, 8] [1, 1] + // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>) // CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]] diff --git a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir index bf0e96d..9d06888 100644 --- a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir @@ -23,11 +23,13 @@ func.func @fill_matmul_tensors( // CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] // CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]] // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { +// CHECK: %[[OUTSLICEA:.+]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor to tensor +// CHECK: %[[OUTSLICEB:.+]] = tensor.extract_slice %{{.*}}[0, %{{.*}}] [%{{.*}}, %{{.*}}] [1, 1] : tensor to tensor // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TC1]] // CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[SLICE]] // CHECK: %[[sTD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[FILL]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor +// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[OUTSLICEA]][{{.*}}] : tensor to tensor +// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[OUTSLICEB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) // CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir index 7bf8e30..cfd68e5 100644 --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -77,8 +77,6 @@ func.func @generic_op_tensors( // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (d0 + 3)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0) -> (d0 + 4)> // CHECK: fold_extract_slice // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor @@ -93,15 +91,15 @@ func.func @fold_extract_slice( %0 = tensor.dim %arg1, %c0 : tensor %1 = tensor.extract_slice %arg0[3, 4] [%0, 42] [1, 1] : tensor to tensor + // CHECK: %[[E:.*]] = tensor.extract_slice %[[ARG0]][3, 4] [%[[DIM]], 42] [1, 1] : tensor to tensor + // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = // Fold the existing extract slice op into the one created by the tiling. // CHECK: %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]] - // CHECK: %[[OFF0:.*]] = affine.apply #[[MAP1]](%[[IV0]] - // CHECK: %[[OFF1:.*]] = affine.apply #[[MAP2]](%[[IV1]] - // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] - // CHECK-SAME: %[[OFF0]], %[[OFF1]] + // CHECK: %[[T0:.*]] = tensor.extract_slice %[[E]] + // CHECK-SAME: %[[IV0]], %[[IV1]] // CHECK-SAME: %[[SIZE0]], 3 // CHECK-SAME: 1, 1 // CHECK: {{.*}} = linalg.generic {{.*}} ins(%[[T0]] diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir index 8c133d7..39227e6 100644 --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -66,11 +66,12 @@ func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf3 // CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index // CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]]) // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]]) -// CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] -// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor) +// CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]] +// CHECK: %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] +// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor) // CHECK: %[[C4:.+]] = arith.constant 4 : index // CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]]) -// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]] +// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]] // CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0] // CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) outs(%[[OUT_SLICE2]] : tensor) diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir index ebf3b25..8ae34bd 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -153,14 +153,14 @@ func.func @two_d(%arg0: tensor<10x34xf32>, // CHECK: %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]] // Note that `extract_slice` taking a slice from another `extract_slice` result // is folded to use the operand of the first `extract_slice`. - // CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN]] - // CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[PARTIAL_1]] + // CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN_2]] + // CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]] // CHECK: %[[RES_21:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>) // CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>) // CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]] // - // CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN]] + // CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]] // CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]] // CHECK: %[[RES_22:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>)