From 5c258d7adcd1890b83146dd18d214414c808f53d Mon Sep 17 00:00:00 2001
From: Matthias Springer <me@m-sp.org>
Date: Fri, 30 Jun 2023 12:44:48 +0200
Subject: [PATCH] [mlir][tensor] More static padding sizes for PadOp

Improve `tensor::createPadHighOp`: more static padding sizes when possible.

Differential Revision: https://reviews.llvm.org/D154200
---
 mlir/lib/Dialect/Tensor/Utils/Utils.cpp              |  8 +++-----
 mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir |  5 +----
 mlir/test/Dialect/Linalg/transform-op-pad.mlir       | 12 ++----------
 3 files changed, 6 insertions(+), 19 deletions(-)
diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
index 3ea81d4..c814c08 100644
--- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -24,9 +24,8 @@ using namespace mlir::tensor;
 PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source,
                                     Value pad, bool nofold, Location loc,
                                     OpBuilder &b) {
-  auto zero = b.createOrFold<arith::ConstantIndexOp>(loc, 0);
-  SmallVector<OpFoldResult> low(type.getRank(), zero);
-  SmallVector<OpFoldResult> high(type.getRank(), zero);
+  SmallVector<OpFoldResult> low(type.getRank(), b.getIndexAttr(0));
+  SmallVector<OpFoldResult> high(type.getRank(), b.getIndexAttr(0));
   for (const auto &en : enumerate(type.getShape())) {
     // Pad only the static dimensions of the result tensor type.
     if (ShapedType::isDynamic(en.value()))
@@ -36,8 +35,7 @@ PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source,
     bindDims(b.getContext(), d0);
     OpFoldResult sz = tensor::getMixedSize(b, loc, source, en.index());
     high[en.index()] =
-        affine::makeComposedAffineApply(b, loc, en.value() - d0, {sz})
-            .getResult();
+        affine::makeComposedFoldedAffineApply(b, loc, en.value() - d0, {sz});
   }
   return b.create<PadOp>(loc, type, source, low, high, pad, nofold);
 }
diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
index 283cb43..eaad6bd 100644
--- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
@@ -27,10 +27,7 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x
 // CHECK-SAME:    %[[SRC:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[PAD_VAL:[a-zA-Z0-9]+]]
-// CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG:     %[[C3:.+]] = arith.constant 3 : index
-// CHECK:         %[[PAD:.+]] = tensor.pad %[[SRC]] low[%[[C0]], %[[C0]]] high[%[[C3]], %[[C1]]]
+// CHECK:         %[[PAD:.+]] = tensor.pad %[[SRC]] low[0, 0] high[3, 1]
 // CHECK:           tensor.yield %[[PAD_VAL]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
 // CHECK:         %[[TRANSP:.+]] = linalg.transpose
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
index b46bcc1..e6085d0 100644
--- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
@@ -17,7 +17,6 @@ func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
   %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
 
   //  CHECK-DAG: %[[CST:.*]] = arith.constant 0.
-  //  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 
   //      CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold
   //      CHECK: tensor.yield %[[CST]]
@@ -92,7 +91,6 @@ func.func @static_sizes_output_divisible_on_empty_op(%arg0: tensor<24x12xf32>,
   %3 = tensor.empty() : tensor<4x5xf32>
 
   //  CHECK-DAG: %[[CST:.*]] = arith.constant 0.
-  //  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 
   //      CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold
   //      CHECK: tensor.yield %[[CST]]
@@ -188,7 +186,6 @@ transform.sequence failures(suppress) {
 // linalg op is not produced by an empty op or an extract_slice op.
 
 // CHECK-DAG: #[[$MAP_MIN:.*]] = affine_map<(d0) -> (-d0 + 2044, 16)>
-// CHECK-DAG: #[[$MAP_C0:.*]] = affine_map<() -> (0)>
 // CHECK-DAG: #[[$MAP_TO_16:.*]] = affine_map<(d0) -> (-d0 + 16)>
 // CHECK-LABEL: @outs_not_produced_by_empty_or_extract_slice(
 // CHECK-SAME: %[[A:[^: ]*]]: tensor<128x2044xf32>,
@@ -210,18 +207,14 @@ func.func @outs_not_produced_by_empty_or_extract_slice(%a : tensor<128x2044xf32>
     %extracted_slice_2 = tensor.extract_slice %a[0, %arg3] [128, %11] [1, 1] : tensor<128x2044xf32> to tensor<128x?xf32>
     %extracted_slice_3 = tensor.extract_slice %b[%arg3, 0] [%11, 128] [1, 1] : tensor<2044x128xf32> to tensor<?x128xf32>
     // CHECK-DAG: %[[CST:.*]] = arith.constant 0.
-    // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 
-    // CHECK-DAG: %[[ZERO:.*]] = affine.apply #[[$MAP_C0]]()
     // CHECK-DAG: %[[TO_16:.*]] = affine.apply #[[$MAP_TO_16]](%[[MIN]])
-    // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[%[[C0]], %[[C0]]] high[%[[ZERO]], %[[TO_16]]]
+    // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[0, 0] high[0, %[[TO_16]]]
     // CHECK: tensor.yield %[[CST]]
     // CHECK: %[[PADDED_B_SLICE:.*]] = tensor.pad %[[B_SLICE]] nofold
     // The output shape is already padded, so actually we shouldn't
     // add anything to the upper bound.
-    // CHECK: %[[ZERO0:.*]] = affine.apply #[[$MAP_C0]]()
-    // CHECK: %[[ZERO1:.*]] = affine.apply #[[$MAP_C0]]()
-    // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[%[[ZERO0]], %[[ZERO1]]]
+    // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[0, 0]
 
     //      CHECK: %[[T5:.*]] = linalg.matmul
     // CHECK-SAME:              ins(%[[PADDED_A_SLICE]], %[[PADDED_B_SLICE]] : tensor<128x16xf32>, tensor<16x128xf32>)
@@ -261,7 +254,6 @@ func.func @pack_everything(%arg0: tensor<24x12xf32>,
   %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
 
   //  CHECK-DAG: %[[CST:.*]] = arith.constant 0.
-  //  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
 
   //      CHECK: %[[PAD0:.*]] = tensor.pad %[[T0]] nofold
   //      CHECK: %[[PAD1:.*]] = tensor.pad %[[T1]] nofold
-- 
2.7.4