From 225b960cfcc6091e0d51671f446cce7e00d41756 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 21 Jun 2021 16:29:42 +0900 Subject: [PATCH] [mlir][linalg] Support low padding in subtensor(pad_tensor) lowering Differential Revision: https://reviews.llvm.org/D104591 --- mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp | 59 ++++++++++------- .../Dialect/Linalg/subtensor-of-padtensor.mlir | 75 ++++++++++++++++++++++ 2 files changed, 111 insertions(+), 23 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 4c2df05..45c91c0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -731,9 +731,6 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite( Value padValue = padOp.getConstantPaddingValue(); if (!padValue) return failure(); - // Only zero low padding supported at the moment. - if (!padOp.hasZeroLowPad()) - return failure(); // Helper variables and functions for various arithmetic operations. These are // used extensively for computing new offset/length and padding values. @@ -788,33 +785,53 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite( int64_t rank = padOp.getSourceType().getRank(); for (unsigned dim = 0; dim < rank; ++dim) { + auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]); auto offset = asValue(rewriter, loc, subTensorOp.getMixedOffsets()[dim]); auto length = asValue(rewriter, loc, subTensorOp.getMixedSizes()[dim]); auto srcSize = rewriter.createOrFold( loc, padOp.source(), dim); - // Existing low padding is zero, so new low padding is also zero. - Value newLow = zero; + // The new amount of low padding is `low - offset`. Except for the case + // where none of the low padding is read. In that case, the new amount of + // low padding is zero. + Value newLow = max(zero, sub(low, offset)); appendIndex(newLow, newLows, staticNewLows); - // There is no low padding, so the offset remains unchanged. Except for the - // case where the SubTensorOp starts reading from a position within the high - // padding. In that case, set the offset to the end of source tensor. The - // new SubTensorOp length will be zero in that case. (Effectively reading no + // Start reading the data from position `offset - low`. Since the original + // read may have started in the low padding zone, this value could be + // negative. Therefore, start reading from: + // + // max(offset - low, 0) + // + // The original read could also have started in the high padding zone. + // In that case, set the offset to the end of source tensor. The new + // SubTensorOp length will be zero in that case. (Effectively reading no // data from the source.) - Value newOffset = min(offset, srcSize); + Value newOffset = min(max(sub(offset, low), zero), srcSize); newOffsets.push_back(asOpFoldResult(rewriter, newOffset)); - // The new SubTensorOp starts reading at `newOffset` and reads until - // `offset + length`. This position may be outside of the source (i.e., - // within the high padding). In that case, read only until the end of the - // source. In mathematical terms: + // The original SubTensorOp was reading until position `offset + length`. + // Therefore, the corresponding position within the source tensor is: + // + // offset + length - low // - // endLoc = min(offset + length, srcSize) + // In case the original SubTensorOp stopped reading within the low padding + // zone, this value can be negative. In that case, the end position of the + // read should be zero. (Similar to newOffset.) + // + // The original read could also have stopped in the high padding zone. + // In that case, set the end positition of the read should be the end of the + // source tensor. (Similar to newOffset.) + // + // endLoc = min(max(offset - low + length, 0), srcSize) // // The new SubTensorOp length is `endLoc - newOffset`. - Value newLength = sub(min(add(offset, length), srcSize), newOffset); + Value endLoc = min(max(add(sub(offset, low), length), zero), srcSize); + Value newLength = sub(endLoc, newOffset); newLengths.push_back(asOpFoldResult(rewriter, newLength)); + + // Check if newLength is zero. In that case, no SubTensorOp should be + // executed. if (auto newLengthInt = getConstantIntValue(newLength)) { hasZeroLen |= *newLengthInt == 0; } else { @@ -824,13 +841,9 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite( ? rewriter.create(loc, check, dynHasZeroLenCond) : check; } - // The number of elements available to read from the source (starting from - // the new offset) is `maxRead = srcSize - newOffset`. The original - // SubTensorOp may have read a larger number of elements `length > maxRead`. - // In that case, the missing number of elements `length - maxRead` must be - // paddded. (If `maxRead > length`, more than enough data is available to - // read and no high padding is needed.) - Value newHigh = max(zero, add(sub(newOffset, srcSize), length)); + // The amount of high padding is simply the number of elements remaining, + // so that the result has the same length as the original SubTensorOp. + Value newHigh = sub(sub(length, newLength), newLow); appendIndex(newHigh, newHighs, staticNewHighs); // Only unit stride supported. diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir index 7d9c770..9eb8c2b 100644 --- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir +++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir @@ -35,6 +35,44 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) // ----- +// CHECK-LABEL: @static_low_pad_only +// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 +// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: subtensor +// CHECK: %[[RESULT:.*]] = tensor.generate +// CHECK: tensor.yield %[[PAD]] +// CHECK: return %[[RESULT]] : tensor<2x3xf32> +func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) + -> tensor<2x3xf32> { + %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + ^bb0(%arg1: index, %arg2: index): + linalg.yield %pad : f32 + } : tensor<4x5xf32> to tensor<14x20xf32> + %1 = subtensor %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32> + return %1 : tensor<2x3xf32> +} + +// ----- + +// CHECK-LABEL: @static_low_pad_only_2 +// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 +// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: subtensor +// CHECK: %[[RESULT:.*]] = tensor.generate +// CHECK: tensor.yield %[[PAD]] +// CHECK: return %[[RESULT]] : tensor<1x3xf32> +func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32) + -> tensor<1x3xf32> { + %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + ^bb0(%arg1: index, %arg2: index): + linalg.yield %pad : f32 + } : tensor<4x5xf32> to tensor<14x20xf32> + %1 = subtensor %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32> + return %1 : tensor<1x3xf32> +} + +// ----- + // CHECK-LABEL: @static_mixed_data_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor @@ -54,6 +92,43 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) // ----- +// CHECK-LABEL: @static_mixed_data_low_pad +// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 +// CHECK-NOT: linalg.pad_tensor +// CHECK: %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> +// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0] +// CHECK: linalg.yield %[[PAD]] +// CHECK: return %[[RESULT]] : tensor<3x4xf32> +func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32) + -> tensor<3x4xf32> { + %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + ^bb0(%arg1: index, %arg2: index): + linalg.yield %pad : f32 + } : tensor<4x5xf32> to tensor<14x20xf32> + %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32> + return %1 : tensor<3x4xf32> +} + +// ----- + +// CHECK-LABEL: @static_mixed_data_low_high_pad +// CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 +// CHECK-NOT: linalg.pad_tensor +// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] low[1, 1] high[2, 3] +// CHECK: linalg.yield %[[PAD]] +// CHECK: return %[[RESULT]] : tensor<7x9xf32> +func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) + -> tensor<7x9xf32> { + %0 = linalg.pad_tensor %arg0 low[2, 3] high[7, 8] { + ^bb0(%arg1: index, %arg2: index): + linalg.yield %pad : f32 + } : tensor<4x5xf32> to tensor<13x16xf32> + %1 = subtensor %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32> + return %1 : tensor<7x9xf32> +} + +// ----- + // CHECK-LABEL: @dynamic_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor // CHECK-NOT: linalg.pad_tensor -- 2.7.4