From 225b960cfcc6091e0d51671f446cce7e00d41756 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Mon, 21 Jun 2021 16:29:42 +0900
Subject: [PATCH] [mlir][linalg] Support low padding in subtensor(pad_tensor)
 lowering

Differential Revision: https://reviews.llvm.org/D104591
---
 mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp  | 59 ++++++++++-------
 .../Dialect/Linalg/subtensor-of-padtensor.mlir     | 75 ++++++++++++++++++++++
 2 files changed, 111 insertions(+), 23 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 4c2df05..45c91c0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -731,9 +731,6 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
   Value padValue = padOp.getConstantPaddingValue();
   if (!padValue)
     return failure();
-  // Only zero low padding supported at the moment.
-  if (!padOp.hasZeroLowPad())
-    return failure();
 
   // Helper variables and functions for various arithmetic operations. These are
   // used extensively for computing new offset/length and padding values.
@@ -788,33 +785,53 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
 
   int64_t rank = padOp.getSourceType().getRank();
   for (unsigned dim = 0; dim < rank; ++dim) {
+    auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]);
     auto offset = asValue(rewriter, loc, subTensorOp.getMixedOffsets()[dim]);
     auto length = asValue(rewriter, loc, subTensorOp.getMixedSizes()[dim]);
     auto srcSize = rewriter.createOrFold<memref::DimOp>(
         loc, padOp.source(), dim);
 
-    // Existing low padding is zero, so new low padding is also zero.
-    Value newLow = zero;
+    // The new amount of low padding is `low - offset`. Except for the case
+    // where none of the low padding is read. In that case, the new amount of
+    // low padding is zero.
+    Value newLow = max(zero, sub(low, offset));
     appendIndex(newLow, newLows, staticNewLows);
 
-    // There is no low padding, so the offset remains unchanged. Except for the
-    // case where the SubTensorOp starts reading from a position within the high
-    // padding. In that case, set the offset to the end of source tensor. The
-    // new SubTensorOp length will be zero in that case. (Effectively reading no
+    // Start reading the data from position `offset - low`. Since the original
+    // read may have started in the low padding zone, this value could be
+    // negative. Therefore, start reading from:
+    //
+    // max(offset - low, 0)
+    //
+    // The original read could also have started in the high padding zone.
+    // In that case, set the offset to the end of source tensor. The new
+    // SubTensorOp length will be zero in that case. (Effectively reading no
     // data from the source.)
-    Value newOffset = min(offset, srcSize);
+    Value newOffset = min(max(sub(offset, low), zero), srcSize);
     newOffsets.push_back(asOpFoldResult(rewriter, newOffset));
 
-    // The new SubTensorOp starts reading at `newOffset` and reads until
-    // `offset + length`. This position may be outside of the source (i.e.,
-    // within the high padding). In that case, read only until the end of the
-    // source. In mathematical terms:
+    // The original SubTensorOp was reading until position `offset + length`.
+    // Therefore, the corresponding position within the source tensor is:
+    //
+    // offset + length - low
     //
-    // endLoc = min(offset + length, srcSize)
+    // In case the original SubTensorOp stopped reading within the low padding
+    // zone, this value can be negative. In that case, the end position of the
+    // read should be zero. (Similar to newOffset.)
+    //
+    // The original read could also have stopped in the high padding zone.
+    // In that case, set the end positition of the read should be the end of the
+    // source tensor. (Similar to newOffset.)
+    //
+    // endLoc = min(max(offset - low + length, 0), srcSize)
     //
     // The new SubTensorOp length is `endLoc - newOffset`.
-    Value newLength = sub(min(add(offset, length), srcSize), newOffset);
+    Value endLoc = min(max(add(sub(offset, low), length), zero), srcSize);
+    Value newLength = sub(endLoc, newOffset);
     newLengths.push_back(asOpFoldResult(rewriter, newLength));
+
+    // Check if newLength is zero. In that case, no SubTensorOp should be
+    // executed.
     if (auto newLengthInt = getConstantIntValue(newLength)) {
       hasZeroLen |= *newLengthInt == 0;
     } else {
@@ -824,13 +841,9 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
           ? rewriter.create<AndOp>(loc, check, dynHasZeroLenCond) : check;
     }
 
-    // The number of elements available to read from the source (starting from
-    // the new offset) is `maxRead = srcSize - newOffset`. The original
-    // SubTensorOp may have read a larger number of elements `length > maxRead`.
-    // In that case, the missing number of elements `length - maxRead` must be
-    // paddded. (If `maxRead > length`, more than enough data is available to
-    // read and no high padding is needed.)
-    Value newHigh = max(zero, add(sub(newOffset, srcSize), length));
+    // The amount of high padding is simply the number of elements remaining,
+    // so that the result has the same length as the original SubTensorOp.
+    Value newHigh = sub(sub(length, newLength), newLow);
     appendIndex(newHigh, newHighs, staticNewHighs);
 
     // Only unit stride supported.
diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
index 7d9c770..9eb8c2b 100644
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -35,6 +35,44 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // -----
 
+// CHECK-LABEL: @static_low_pad_only
+//  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
+//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   subtensor
+//       CHECK:   %[[RESULT:.*]] = tensor.generate
+//       CHECK:     tensor.yield %[[PAD]]
+//       CHECK:   return %[[RESULT]] : tensor<2x3xf32>
+func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
+    -> tensor<2x3xf32> {
+  %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+    ^bb0(%arg1: index, %arg2: index):
+      linalg.yield %pad : f32
+    } : tensor<4x5xf32> to tensor<14x20xf32>
+  %1 = subtensor %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
+  return %1 : tensor<2x3xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @static_low_pad_only_2
+//  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
+//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   subtensor
+//       CHECK:   %[[RESULT:.*]] = tensor.generate
+//       CHECK:     tensor.yield %[[PAD]]
+//       CHECK:   return %[[RESULT]] : tensor<1x3xf32>
+func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
+    -> tensor<1x3xf32> {
+  %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+    ^bb0(%arg1: index, %arg2: index):
+      linalg.yield %pad : f32
+    } : tensor<4x5xf32> to tensor<14x20xf32>
+  %1 = subtensor %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
+  return %1 : tensor<1x3xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @static_mixed_data_high_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
@@ -54,6 +92,43 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // -----
 
+// CHECK-LABEL: @static_mixed_data_low_pad
+//  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
+//   CHECK-NOT:   linalg.pad_tensor
+//       CHECK:   %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0]
+//       CHECK:     linalg.yield %[[PAD]]
+//       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
+func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
+    -> tensor<3x4xf32> {
+  %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+    ^bb0(%arg1: index, %arg2: index):
+      linalg.yield %pad : f32
+    } : tensor<4x5xf32> to tensor<14x20xf32>
+  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
+  return %1 : tensor<3x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @static_mixed_data_low_high_pad
+//  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
+//   CHECK-NOT:   linalg.pad_tensor
+//       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] low[1, 1] high[2, 3]
+//       CHECK:     linalg.yield %[[PAD]]
+//       CHECK:   return %[[RESULT]] : tensor<7x9xf32>
+func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
+    -> tensor<7x9xf32> {
+  %0 = linalg.pad_tensor %arg0 low[2, 3] high[7, 8] {
+    ^bb0(%arg1: index, %arg2: index):
+      linalg.yield %pad : f32
+    } : tensor<4x5xf32> to tensor<13x16xf32>
+  %1 = subtensor %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
+  return %1 : tensor<7x9xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @dynamic_high_pad
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x5xf32>
 //   CHECK-NOT:   linalg.pad_tensor
-- 
2.7.4