From 5c258d7adcd1890b83146dd18d214414c808f53d Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 30 Jun 2023 12:44:48 +0200 Subject: [PATCH] [mlir][tensor] More static padding sizes for PadOp Improve `tensor::createPadHighOp`: more static padding sizes when possible. Differential Revision: https://reviews.llvm.org/D154200 --- mlir/lib/Dialect/Tensor/Utils/Utils.cpp | 8 +++----- mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir | 5 +---- mlir/test/Dialect/Linalg/transform-op-pad.mlir | 12 ++---------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index 3ea81d4..c814c08 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -24,9 +24,8 @@ using namespace mlir::tensor; PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &b) { - auto zero = b.createOrFold(loc, 0); - SmallVector low(type.getRank(), zero); - SmallVector high(type.getRank(), zero); + SmallVector low(type.getRank(), b.getIndexAttr(0)); + SmallVector high(type.getRank(), b.getIndexAttr(0)); for (const auto &en : enumerate(type.getShape())) { // Pad only the static dimensions of the result tensor type. if (ShapedType::isDynamic(en.value())) @@ -36,8 +35,7 @@ PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source, bindDims(b.getContext(), d0); OpFoldResult sz = tensor::getMixedSize(b, loc, source, en.index()); high[en.index()] = - affine::makeComposedAffineApply(b, loc, en.value() - d0, {sz}) - .getResult(); + affine::makeComposedFoldedAffineApply(b, loc, en.value() - d0, {sz}); } return b.create(loc, type, source, low, high, pad, nofold); } diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir index 283cb43..eaad6bd 100644 --- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir @@ -27,10 +27,7 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]] -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK: %[[PAD:.+]] = tensor.pad %[[SRC]] low[%[[C0]], %[[C0]]] high[%[[C3]], %[[C1]]] +// CHECK: %[[PAD:.+]] = tensor.pad %[[SRC]] low[0, 0] high[3, 1] // CHECK: tensor.yield %[[PAD_VAL]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir index b46bcc1..e6085d0 100644 --- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -17,7 +17,6 @@ func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> // CHECK-DAG: %[[CST:.*]] = arith.constant 0. - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold // CHECK: tensor.yield %[[CST]] @@ -92,7 +91,6 @@ func.func @static_sizes_output_divisible_on_empty_op(%arg0: tensor<24x12xf32>, %3 = tensor.empty() : tensor<4x5xf32> // CHECK-DAG: %[[CST:.*]] = arith.constant 0. - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold // CHECK: tensor.yield %[[CST]] @@ -188,7 +186,6 @@ transform.sequence failures(suppress) { // linalg op is not produced by an empty op or an extract_slice op. // CHECK-DAG: #[[$MAP_MIN:.*]] = affine_map<(d0) -> (-d0 + 2044, 16)> -// CHECK-DAG: #[[$MAP_C0:.*]] = affine_map<() -> (0)> // CHECK-DAG: #[[$MAP_TO_16:.*]] = affine_map<(d0) -> (-d0 + 16)> // CHECK-LABEL: @outs_not_produced_by_empty_or_extract_slice( // CHECK-SAME: %[[A:[^: ]*]]: tensor<128x2044xf32>, @@ -210,18 +207,14 @@ func.func @outs_not_produced_by_empty_or_extract_slice(%a : tensor<128x2044xf32> %extracted_slice_2 = tensor.extract_slice %a[0, %arg3] [128, %11] [1, 1] : tensor<128x2044xf32> to tensor<128x?xf32> %extracted_slice_3 = tensor.extract_slice %b[%arg3, 0] [%11, 128] [1, 1] : tensor<2044x128xf32> to tensor // CHECK-DAG: %[[CST:.*]] = arith.constant 0. - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[ZERO:.*]] = affine.apply #[[$MAP_C0]]() // CHECK-DAG: %[[TO_16:.*]] = affine.apply #[[$MAP_TO_16]](%[[MIN]]) - // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[%[[C0]], %[[C0]]] high[%[[ZERO]], %[[TO_16]]] + // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[0, 0] high[0, %[[TO_16]]] // CHECK: tensor.yield %[[CST]] // CHECK: %[[PADDED_B_SLICE:.*]] = tensor.pad %[[B_SLICE]] nofold // The output shape is already padded, so actually we shouldn't // add anything to the upper bound. - // CHECK: %[[ZERO0:.*]] = affine.apply #[[$MAP_C0]]() - // CHECK: %[[ZERO1:.*]] = affine.apply #[[$MAP_C0]]() - // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[%[[ZERO0]], %[[ZERO1]]] + // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[0, 0] // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[PADDED_A_SLICE]], %[[PADDED_B_SLICE]] : tensor<128x16xf32>, tensor<16x128xf32>) @@ -261,7 +254,6 @@ func.func @pack_everything(%arg0: tensor<24x12xf32>, %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> // CHECK-DAG: %[[CST:.*]] = arith.constant 0. - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[PAD0:.*]] = tensor.pad %[[T0]] nofold // CHECK: %[[PAD1:.*]] = tensor.pad %[[T1]] nofold -- 2.7.4