auto sourceType = padOp.getSourceType();
- // Copy of source with static shape can be vectorized.
- if (sourceType.hasStaticShape()) {
- auto vecType = VectorType::get(sourceType.getShape(),
- sourceType.getElementType());
- vectorizeStaticShapeSource(rewriter, padOp, fill, vecType);
+ // Try vectorizing the copy of source.
+ if (tryVectorizeCopy(rewriter, padOp, padValue, fill).succeeded())
return success();
- }
-
- // TODO: Vectorize dynamic source but static destination.
// Neither source type nor PadTensorOp result type have static shape. Such
// PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead.
return success();
}
- /// Vectorize the copying of a PadTensorOp's source that has static shape.
- void vectorizeStaticShapeSource(PatternRewriter &rewriter, PadTensorOp padOp,
- Value dest, VectorType vecType) const {
+ /// Vectorize the copying of a PadTensorOp's source. This is possible if each
+ /// dimension size is statically know in the source type or the result type
+ /// (or both).
+ LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, PadTensorOp padOp,
+ Value padValue, Value dest) const {
+ auto sourceType = padOp.getSourceType();
+ auto resultType = padOp.getResultType();
+
+ SmallVector<int64_t> vecShape;
+ SmallVector<bool> readInBounds;
+ SmallVector<bool> writeInBounds;
+ for (unsigned i = 0; i < sourceType.getRank(); ++i) {
+ if (!sourceType.isDynamicDim(i)) {
+ vecShape.push_back(sourceType.getDimSize(i));
+ // Source shape is statically known: Neither read nor write are out-of-
+ // bounds.
+ readInBounds.push_back(true);
+ writeInBounds.push_back(true);
+ } else if (!resultType.isDynamicDim(i)) {
+ // Source shape is not statically known, but result shape is. Vectorize
+ // with size of result shape. This may be larger than the source size.
+ vecShape.push_back(resultType.getDimSize(i));
+ // Read may be out-of-bounds because the result size could be larger
+ // than the source size.
+ readInBounds.push_back(false);
+ // Write is out-of-bounds if low padding > 0.
+ writeInBounds.push_back(
+ isEqualConstantIntOrValue(padOp.getMixedLowPad()[i],
+ rewriter.getIndexAttr(0)));
+ } else {
+ // Neither source nor result dim of padOp is static. Cannot vectorize
+ // the copy.
+ return failure();
+ }
+ }
+ auto vecType = VectorType::get(vecShape, sourceType.getElementType());
+
// Generate TransferReadOp.
SmallVector<Value> readIndices(
vecType.getRank(), rewriter.create<ConstantIndexOp>(padOp.getLoc(), 0));
auto read = rewriter.create<vector::TransferReadOp>(
- padOp.getLoc(), vecType, padOp.source(), readIndices);
+ padOp.getLoc(), vecType, padOp.source(), readIndices, padValue,
+ readInBounds);
- // Generate TransferWriteOp. The destination dimensions may be dynamic, but
- // the write cannot be out-of-bounds. (A large enough destination tensor is
- // allocated in this pattern.)
+ // Generate TransferWriteOp.
auto writeIndices = ofrToIndexValues(
rewriter, padOp.getLoc(), padOp.getMixedLowPad());
- SmallVector<bool> inBounds(vecType.getRank(), true);
rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
- padOp, read, dest, writeIndices, inBounds);
+ padOp, read, dest, writeIndices, writeInBounds);
+
+ return success();
}
};
// CHECK-LABEL: func @pad_static(
// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
// CHECK-NOT: linalg.pad_tensor
-// CHECK-DAG: %[[C1:.*]] = constant 1 : index
+// CHECK-DAG: %[[C0:.*]] = constant 0 : index
+// CHECK-DAG: %[[C2:.*]] = constant 2 : index
// CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
// CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32>
// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32>
-// CHECK-DAG: %[[DIM1:.*]] = memref.dim %[[ARG0]], %[[C1]]
-// CHECK: %[[RESULT:.*]] = subtensor_insert %[[ARG0]] into %2[0, 0, 2] [2, %[[DIM1]], 2] [1, 1, 1] : tensor<2x?x2xf32> into tensor<2x3x4xf32>
+// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32>
+// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32>
// CHECK: return %[[RESULT]]
func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
%0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {