tensor.empty/linalg.init_tensor produces an uninititalized tensor that can be used as a destination operand for destination-style ops (ops that implement `DestinationStyleOpInterface`).
This change makes it possible to implement `TilingInterface` for non-destination-style ops without depending on the Linalg dialect.
RFC: https://discourse.llvm.org/t/rfc-add-tensor-from-shape-operation/65101
Differential Revision: https://reviews.llvm.org/D135129
`fill` with arbitrary ranked output tensors:
```python
-tensor_2d = linalg.InitTensorOp([4, 8], f32)
-tensor_3d = linalg.InitTensorOp([4, 8, 16], f32)
+tensor_2d = tensor.EmptyOp([4, 8], f32)
+tensor_3d = tensor.EmptyOp([4, 8, 16], f32)
fill(value, outs=[tensor_2d])
fill(value, outs=[tensor_3d])
```
class Linalg_Op<string mnemonic, list<Trait> traits = []> :
Op<Linalg_Dialect, mnemonic, traits>;
-def Linalg_InitTensorOp : Linalg_Op<"init_tensor",
- [NoSideEffect,
- DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
- let summary = "operation to define a tensor of particular shape";
-
- let description = [{
- `linalg.init_tensor` is an operation that defines a tensor of a particular
- shape. The shape could be dynamic or static. The contents of the tensor are
- unspecified and the only purpose of the op result is to materialize the
- specified shape in IR and make it available to other transformations.
-
- Note: This op can be lowered to a `bufferization.alloc_tensor`, at which
- point it turns into an explicit buffer allocation.
- }];
-
- let arguments =
- (ins Variadic<Index>:$sizes, I64ArrayAttr:$static_sizes);
-
- let results = (outs AnyTensor:$result);
-
- let assemblyFormat = [{
- custom<DynamicIndexList>($sizes, $static_sizes,
- "ShapedType::kDynamicSize")
- attr-dict `:` type($result)
- }];
-
- let extraClassDeclaration = [{
- static StringRef getStaticSizesAttrStrName() {
- return "static_sizes";
- }
-
- RankedTensorType getType() {
- return getResult().getType().cast<RankedTensorType>(); }
-
- // Infer the shape of the result tensor given the static shapes
- // and element type of the result tensor.
- static Type inferResultType(ArrayRef<int64_t> staticSizes, Type elementType,
- Attribute encoding = {});
-
- // Return true if the size of the tensor is dynamic at `idx`
- bool isDynamicSize(unsigned idx) {
- APInt v = *(getStaticSizes().getAsValueRange<IntegerAttr>().begin() + idx);
- return ShapedType::isDynamic(v.getSExtValue());
- }
-
- // Assert that the size of the result tensor is static at `idx`
- // and return the shape.
- int64_t getStaticSize(unsigned idx) {
- assert(!isDynamicSize(idx) && "expected static size");
- APInt v = *(getStaticSizes().
- template getAsValueRange<IntegerAttr>().begin() + idx);
- return v.getSExtValue();
- }
-
- // Return the argument position that contains the dynamic size of
- // the tensor at dimension `idx`. Asserts that the shape is
- // dynamic at that `idx`.
- unsigned getIndexOfDynamicSize(unsigned idx) {
- assert(isDynamicSize(idx) && "expected dynamic size");
- return std::count_if(
- getStaticSizes().getValue().begin(),
- getStaticSizes().getValue().begin() + idx,
- [&](Attribute attr) {
- return ShapedType::isDynamic(attr.cast<IntegerAttr>().getInt());
- });
- }
-
- // Return both static and dynamic sizes as a list of `OpFoldResult`.
- SmallVector<OpFoldResult> getMixedSizes();
-
- // Return the Value of the dynamic size of the tensor at dimension
- // `idx`. Asserts that the shape is dynamic at that `idx.
- Value getDynamicSize(unsigned idx) {
- return getOperand(getIndexOfDynamicSize(idx));
- }
- }];
-
- let builders = [
- OpBuilder<(ins "ValueRange":$shape,
- "ArrayRef<int64_t>":$staticShape, "Type":$elementType),
- [{
- build($_builder, $_state,
- InitTensorOp::inferResultType(staticShape, elementType),
- shape, $_builder.getI64ArrayAttr(staticShape));
- }]>,
- OpBuilder<(ins "ValueRange":$shape, "Type":$elementType),
- [{
- SmallVector<int64_t, 4> staticShape(
- shape.size(), ShapedType::kDynamicSize);
- build($_builder, $_state, shape, staticShape, elementType);
- }]>,
- OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType),
- [{
- build($_builder, $_state, ValueRange{}, staticShape, elementType);
- }]>,
- OpBuilder<(ins "ArrayRef<OpFoldResult>":$sizes, "Type":$elementType,
- CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
- ];
-
- let hasCanonicalizer = 1;
- let hasCustomAssemblyFormat = 1;
- let hasVerifier = 1;
-}
-
def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>,
Arguments<(ins Variadic<AnyType>:$values)> {
let summary = "Linalg yield operation";
std::unique_ptr<OperationPass<func::FuncOp>>
createConvertLinalgToAffineLoopsPass();
-/// Create a pass that rewrites init_tensor to alloc_tensor.
-std::unique_ptr<Pass> createLinalgInitTensorToAllocTensorPass();
+/// Create a pass that rewrites tensor.empty to bufferization.alloc_tensor.
+std::unique_ptr<Pass> createEmptyTensorToAllocTensorPass();
/// Create a pass to convert Linalg operations which work on tensors to use
/// buffers instead.
let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"];
}
-def LinalgInitTensorToAllocTensor : Pass<"linalg-init-tensor-to-alloc-tensor"> {
- let summary = "Replace all init_tensor ops by alloc_tensor ops.";
+def EmptyTensorToAllocTensor : Pass<"empty-tensor-to-alloc-tensor"> {
+ let summary = "Replace all empty ops by alloc_tensor ops.";
let description = [{
- init_tensor ops return a tensor of unspecified contents who's only purpose
+ tensor.empty ops return a tensor of unspecified contents who's only purpose
is to carry the tensor shape. This pass converts such ops to
bufferization.alloc_tensor ops, which bufferize to buffer allocations.
}];
- let constructor = "mlir::createLinalgInitTensorToAllocTensorPass()";
+ let constructor = "mlir::createEmptyTensorToAllocTensorPass()";
}
def LinalgFoldUnitExtentDims : Pass<"linalg-fold-unit-extent-dims", ""> {
```
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
- %1 = linalg.init_tensor [4] : tensor<4xf32>
+ %1 = tensor.empty() : tensor<4xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
%3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
- %0 = linalg.init_tensor [16, 32, 64] : tensor<16x32x64xf32>
+ %0 = tensor.empty() : tensor<16x32x64xf32>
%cst = arith.constant 0.000000e+00 : f32
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
tensor<16x32x64xf32>
- %2 = linalg.init_tensor [64, 4] : tensor<64x4xi1>
+ %2 = tensor.empty() : tensor<64x4xi1>
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
///
/// ```
/// scf.for (%i) {
-/// %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
+/// %packed_init = tensor.empty range(%j) : tensor<?x4x8xf32>
/// %packed = scf.for (%k) iter_args(%p : %packed_init) {
/// %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
/// %0 = tensor.pad %st0 low[0, 0] high[...] {
using OptimizeCopyFn =
std::function<LogicalResult(PatternRewriter &, tensor::PadOp, Value)>;
-/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
/// InsertSliceOp. For now, only constant padding values are supported.
/// `OptimizeCopyFn` can be used to customize copying step optimization.
struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
/// ```
/// %cst = arith.constant 0.000000e+00 : f32
/// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
-/// %1 = linalg.init_tensor [4] : tensor<4xf32>
+/// %1 = tensor.empty [4] : tensor<4xf32>
/// %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
/// %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
/// affine_map<(d0, d1) -> (d0)>],
/// #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
/// #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
/// #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
-/// %0 = linalg.init_tensor [16, 32, 64] : tensor<16x32x64xf32>
+/// %0 = tensor.empty [16, 32, 64] : tensor<16x32x64xf32>
/// %cst = arith.constant 0.000000e+00 : f32
/// %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
/// tensor<16x32x64xf32>
-/// %2 = linalg.init_tensor [64, 4] : tensor<64x4xi1>
+/// %2 = tensor.empty [64, 4] : tensor<64x4xi1>
///
/// %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
/// iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
let hasConstantMaterializer = 1;
let dependentDialects = [
+ "AffineDialect",
"arith::ArithDialect",
"complex::ComplexDialect",
];
}
//===----------------------------------------------------------------------===//
+// EmptyOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_EmptyOp : Tensor_Op<"empty",
+ [NoSideEffect,
+ DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
+ let summary = "empty tensor operation";
+
+ let description = [{
+ `tensor.empty` is an operation that defines a tensor of a particular shape.
+ The shape could be dynamic or static. The contents of the tensor are
+ unspecified and the only purpose of the op result is to materialize the
+ specified shape in IR and make it available to other transformations.
+
+ `tensor.empty` is useful in transformations that expect destination style
+ ops. I.e., ops that implement `DestinationStyleOpInterface`. Ops that are
+ not in destination style can be made compatible with such transformations
+ with a `tensor.empty` destination.
+
+ Note: This op can be lowered to a `bufferization.alloc_tensor`, at which
+ point it turns into an explicit buffer allocation.
+ }];
+
+ let arguments = (ins Variadic<Index>:$dynamicSizes);
+
+ let results = (outs AnyRankedTensor:$result);
+
+ let assemblyFormat = "`(`$dynamicSizes`)` attr-dict `:` type($result)";
+
+ let extraClassDeclaration = [{
+ RankedTensorType getType() {
+ return getResult().getType().cast<RankedTensorType>();
+ }
+
+ // Return both static and dynamic sizes as a list of `OpFoldResult`.
+ SmallVector<OpFoldResult> getMixedSizes();
+
+ // Return the Value of the dynamic size of the tensor at dimension `idx`.
+ // Asserts that the shape is dynamic at that `idx`.
+ Value getDynamicSize(unsigned idx);
+ }];
+
+ let builders = [
+ // Build with fully static sizes.
+ OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType)>,
+
+ // Build with mixed static/dynamic sizes.
+ OpBuilder<(ins "ArrayRef<int64_t>":$staticShape, "Type":$elementType,
+ "ValueRange":$dynamicSizes)>,
+
+ // Build with mixed static/dynamic sizes.
+ OpBuilder<(ins "ArrayRef<OpFoldResult>":$sizes, "Type":$elementType)>
+ ];
+
+ let hasCanonicalizer = 1;
+ let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
// ExtractOp
//===----------------------------------------------------------------------===//
/// We can construct %2 by generating the following, which only uses `%0`:
///
/// ```
-/// %dest = linalg.init_tensor [%size0, %size1] : tensor<?x?xf32>
+/// %dest = tensor.empty(%size0, %size1) : tensor<?x?xf32>
/// %1 = tensor.dim %0, %c1 : tensor<3x?x?x11x?xf32>
/// %2 = tensor.dim %0, %c2 : tensor<3x?x?x11x?xf32>
/// %3 = tensor.dim %0, %c4 : tensor<3x?x?x11x?xf32>
/// ```
/// This class helps build the below IR to replace %2:
/// ```
-/// %dest = linalg.init_tensor() : tensor<10x10xf32>
+/// %dest = tensor.empty() : tensor<10x10xf32>
/// %2 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%arg0) -> tensor<10x10xf32> {
/// %linear_index = affine.apply affine_map<(d0)[]->(d0*2 + 11)>(%iv)
/// %3:3 = arith.delinearize_index %iv into (3, 7, 11)
bodyArgTypes.emplace_back(getElementTypeOrSelf(in.getType()));
SmallVector<Type> opResultTypes;
- SmallVector<Value> initTensors;
+ SmallVector<Value> emptyTensors;
SmallVector<Value> dynDims;
dynDims.resize(results.front().getType().cast<ShapedType>().getRank());
for (auto result : results) {
auto resultTy = result.getType().template cast<ShapedType>();
- initTensors.push_back(rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, resultTy.getShape(), resultTy.getElementType()));
+ emptyTensors.push_back(rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultTy.getElementType(), filteredDims));
opResultTypes.push_back(result.getType());
}
auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range(
- initTensors, [](Value v) { return getElementTypeOrSelf(v); }));
+ emptyTensors, [](Value v) { return getElementTypeOrSelf(v); }));
SmallVector<Value, 2> operands;
SmallVector<AffineMap, 2> indexingMaps;
bool didEncounterError = false;
auto linalgOp = rewriter.create<linalg::GenericOp>(
- loc, opResultTypes, operands, initTensors, indexingMaps,
+ loc, opResultTypes, operands, emptyTensors, indexingMaps,
getNParallelLoopsAttrs(rank),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) {
Value opResult = createLinalgBodyCalculationForElementwiseOp(
Type reduceTy = RankedTensorType::get(reduceShape, resultTy.getElementType());
// First fill the output buffer with the init value.
- auto initTensor = rewriter
- .create<linalg::InitTensorOp>(loc, dynDims, reduceShape,
- resultTy.getElementType())
- .getResult();
+ auto emptyTensor =
+ rewriter
+ .create<tensor::EmptyOp>(loc, reduceShape, resultTy.getElementType(),
+ dynDims)
+ .getResult();
auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter);
if (!fillValueAttr)
auto fillValue = rewriter.create<arith::ConstantOp>(loc, fillValueAttr);
auto filledTensor = rewriter
.create<linalg::FillOp>(loc, ValueRange{fillValue},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
SmallVector<AffineExpr, 2> srcExprs;
SmallVector<Value> filteredDims = condenseValues(dynDims);
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, resultTy.getShape(), resultTy.getElementType());
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultTy.getElementType(), filteredDims);
SmallVector<AffineMap, 2> affineMaps = {
AffineMap::get(resultTy.getRank(), /*symbolCount=*/0, inputExprs,
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
rewriter.replaceOpWithNewOp<linalg::GenericOp>(
- op, resultTy, op.getInput1(), ValueRange{initTensor}, affineMaps,
+ op, resultTy, op.getInput1(), ValueRange{emptyTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
nestedBuilder.create<linalg::YieldOp>(loc, *args.begin());
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));
// Construct the indexing maps needed for linalg.generic ops.
- Value initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, ArrayRef<Value>({dynDims}), outputTy.getShape(),
- outputTy.getElementType());
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, outputTy.getShape(), outputTy.getElementType(),
+ ArrayRef<Value>({dynDims}));
auto linalgOp = rewriter.create<linalg::GenericOp>(
- loc, outputTy, genericInputs, ValueRange{initTensor}, indexingMaps,
+ loc, outputTy, genericInputs, ValueRange{emptyTensor}, indexingMaps,
getNParallelLoopsAttrs(rank),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange blockArgs) {
if (op.getMode() != "NEAREST_NEIGHBOR" && op.getMode() != "BILINEAR")
return failure();
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, dynamicDims, resultTy.getShape(), resultElementTy);
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultElementTy, dynamicDims);
SmallVector<AffineMap, 2> affineMaps = {
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
auto genericOp = rewriter.create<linalg::GenericOp>(
- loc, resultTy, ValueRange({}), ValueRange{initTensor}, affineMaps,
+ loc, resultTy, ValueRange({}), ValueRange{emptyTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()));
rewriter.replaceOp(op, genericOp.getResult(0));
}
sizes[axis] = resultDimSize;
- Value init = rewriter.create<linalg::InitTensorOp>(
- loc, dynDims, resultType.getShape(), resultType.getElementType());
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultType.getShape(), resultType.getElementType(), dynDims);
Value zeroVal = rewriter.createOrFold<arith::ConstantOp>(
loc, rewriter.getZeroAttr(resultType.getElementType()));
- Value result =
- rewriter
- .create<linalg::FillOp>(loc, ValueRange{zeroVal}, ValueRange{init})
- .result();
+ Value result = rewriter
+ .create<linalg::FillOp>(loc, ValueRange{zeroVal},
+ ValueRange{emptyTensor})
+ .result();
auto toOpFoldResult = [](Value v) -> OpFoldResult {
auto op = v.getDefiningOp<arith::ConstantIndexOp>();
Value axisDimSize = rewriter.create<tensor::DimOp>(loc, input, axis);
// First fill the output buffer with the init value.
- auto initTensor = rewriter
- .create<linalg::InitTensorOp>(
- loc, ArrayRef<Value>({dynDims}),
- inputTy.getShape(), inputTy.getElementType())
- .getResult();
+ auto emptyTensor = rewriter
+ .create<tensor::EmptyOp>(loc, inputTy.getShape(),
+ inputTy.getElementType(),
+ ArrayRef<Value>({dynDims}))
+ .getResult();
SmallVector<AffineMap, 2> affineMaps = {
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
rewriter.replaceOpWithNewOp<linalg::GenericOp>(
- op, resultTy, ArrayRef<Value>({}), ValueRange{initTensor}, affineMaps,
+ op, resultTy, ArrayRef<Value>({}), ValueRange{emptyTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
llvm::SmallVector<Value> indices;
}
}
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
- op.getLoc(), dynDims, genericShape, elementTy);
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+ op.getLoc(), genericShape, elementTy, dynDims);
// We needs to map the input shape to the non-broadcasted dimensions.
SmallVector<AffineExpr, 4> dimExprs;
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, RankedTensorType::get(genericShape, elementTy), input,
- ValueRange{initTensor}, affineMaps,
+ ValueRange{emptyTensor}, affineMaps,
getNParallelLoopsAttrs(genericShape.size()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
nestedBuilder.create<linalg::YieldOp>(op.getLoc(), *args.begin());
}
// First fill the output buffer for the index.
- auto initTensorIdx =
- rewriter
- .create<linalg::InitTensorOp>(loc, dynDims, resultTy.getShape(),
- outElementTy)
- .getResult();
+ auto emptyTensorIdx = rewriter
+ .create<tensor::EmptyOp>(loc, resultTy.getShape(),
+ outElementTy, dynDims)
+ .getResult();
auto fillValueIdx = rewriter.create<arith::ConstantOp>(
loc, rewriter.getIntegerAttr(outElementTy, 0));
auto filledTensorIdx =
rewriter
.create<linalg::FillOp>(loc, ValueRange{fillValueIdx},
- ValueRange{initTensorIdx})
+ ValueRange{emptyTensorIdx})
.result();
// Second fill the output buffer for the running max.
- auto initTensorMax = rewriter
- .create<linalg::InitTensorOp>(
- loc, dynDims, resultTy.getShape(), inElementTy)
- .getResult();
+ auto emptyTensorMax = rewriter
+ .create<tensor::EmptyOp>(loc, resultTy.getShape(),
+ inElementTy, dynDims)
+ .getResult();
auto fillValueMaxAttr =
createInitialValueForReduceOp(argmaxOp, inElementTy, rewriter);
auto filledTensorMax =
rewriter
.create<linalg::FillOp>(loc, ValueRange{fillValueMax},
- ValueRange{initTensorMax})
+ ValueRange{emptyTensorMax})
.result();
// We need to reduce along the arg-max axis, with parallel operations along
auto loc = op.getLoc();
- auto initTensor =
+ auto emptyTensor =
rewriter
- .create<linalg::InitTensorOp>(loc, dynamicDims, resultTy.getShape(),
- resultElementTy)
+ .create<tensor::EmptyOp>(loc, resultTy.getShape(), resultElementTy,
+ dynamicDims)
.getResult();
SmallVector<AffineMap, 2> affineMaps = {
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, ArrayRef<Type>({resultTy}), ValueRange{indices},
- ValueRange{initTensor}, affineMaps,
+ ValueRange{emptyTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &b, Location loc, ValueRange args) {
auto indexValue = args[0];
}
}
- auto initTensor =
- rewriter
- .create<linalg::InitTensorOp>(loc, dynDims, resultTy.getShape(),
- resultElementTy)
- .getResult();
+ auto emptyTensor = rewriter
+ .create<tensor::EmptyOp>(loc, resultTy.getShape(),
+ resultElementTy, dynDims)
+ .getResult();
SmallVector<AffineMap, 2> affineMaps = {
rewriter.getMultiDimIdentityMap(resultTy.getRank()),
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
auto genericOp = rewriter.create<linalg::GenericOp>(
- loc, resultTy, ValueRange({input}), ValueRange{initTensor}, affineMaps,
+ loc, resultTy, ValueRange({input}), ValueRange{emptyTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()));
rewriter.replaceOp(op, genericOp.getResult(0));
weightPermValue);
Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy);
- Value initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, resultTy.getShape(), resultETy);
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultETy, filteredDims);
Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
Value zeroTensor = rewriter
.create<linalg::FillOp>(loc, ValueRange{zero},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
// Extract the attributes for convolution.
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
- Value biasInitTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, resultTy.getShape(), resultETy);
+ Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultETy, filteredDims);
if (isQuantized) {
auto quantizationInfo =
Value result =
rewriter
.create<linalg::GenericOp>(
- loc, resultTy, ValueRange({bias, conv}), biasInitTensor,
+ loc, resultTy, ValueRange({bias, conv}), biasEmptyTensor,
indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value result =
rewriter
.create<linalg::GenericOp>(
- loc, resultTy, ValueRange({bias, conv}), biasInitTensor,
+ loc, resultTy, ValueRange({bias, conv}), biasEmptyTensor,
indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy);
- Value initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, linalgConvTy.getShape(), resultETy);
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, linalgConvTy.getShape(), resultETy, filteredDims);
Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
Value zeroTensor = rewriter
.create<linalg::FillOp>(loc, ValueRange{zero},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
- Value biasInitTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, resultTy.getShape(), resultETy);
+ Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultETy, filteredDims);
if (!isQuantized) {
Value conv = rewriter
.create<linalg::DepthwiseConv2DNhwcHwcmOp>(
rewriter
.create<linalg::GenericOp>(
loc, resultTy, ValueRange({bias, convReshape}),
- biasInitTensor, indexingMaps,
+ biasEmptyTensor, indexingMaps,
getNParallelLoopsAttrs(resultRank),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
rewriter
.create<linalg::GenericOp>(
loc, resultTy, ValueRange({bias, convReshape}),
- biasInitTensor, indexingMaps,
+ biasEmptyTensor, indexingMaps,
getNParallelLoopsAttrs(resultRank),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
auto zeroAttr = rewriter.getZeroAttr(outputElementTy);
Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, outputTy.getShape(), outputTy.getElementType());
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, outputTy.getShape(), outputTy.getElementType(), filteredDims);
Value zeroTensor = rewriter
.create<linalg::FillOp>(loc, ValueRange{zero},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
if (!op.getQuantizationInfo()) {
rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank()));
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, filteredDims, outputTy.getShape(), outputTy.getElementType());
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, outputTy.getShape(), outputTy.getElementType(), filteredDims);
// When quantized, the input elemeny type is not the same as the output
Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy);
Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
Value zeroTensor = rewriter
.create<linalg::FillOp>(loc, ValueRange{zero},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
SmallVector<int64_t> permutation{1, 0};
Value transposedWeight = rewriter.create<tosa::TransposeOp>(
loc, newWeightTy, weight, permutationValue);
- auto biasInitTensor =
+ auto biasEmptyTensor =
rewriter
- .create<linalg::InitTensorOp>(loc, filteredDims,
- outputTy.getShape(), outputETy)
+ .create<tensor::EmptyOp>(loc, outputTy.getShape(), outputETy,
+ filteredDims)
->getResults();
if (!op.getQuantizationInfo()) {
Value result =
rewriter
.create<linalg::GenericOp>(
- loc, outputTy, ValueRange({bias, matmul}), biasInitTensor,
+ loc, outputTy, ValueRange({bias, matmul}), biasEmptyTensor,
indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value result =
rewriter
.create<linalg::GenericOp>(
- loc, outputTy, ValueRange({bias, matmul}), biasInitTensor,
+ loc, outputTy, ValueRange({bias, matmul}), biasEmptyTensor,
indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
// Create the linalg op that performs pooling.
- Value initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, dynamicDims, resultTy.getShape(), resultTy.getElementType());
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultTy.getElementType(), dynamicDims);
- Value filledInitTensor =
+ Value filledEmptyTensor =
rewriter
.create<linalg::FillOp>(loc, ValueRange{initialValue},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
Value fakeWindowDims =
- rewriter.create<linalg::InitTensorOp>(loc, kernel, resultETy);
+ rewriter.create<tensor::EmptyOp>(loc, kernel, resultETy);
rewriter.replaceOpWithNewOp<linalg::PoolingNhwcMaxOp>(
op, ArrayRef<Type>{resultTy}, ValueRange{paddedInput, fakeWindowDims},
- filledInitTensor, strideAttr, dilationAttr);
+ filledEmptyTensor, strideAttr, dilationAttr);
return success();
}
};
Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
// Create the linalg op that performs pooling.
- Value poolInitTensor = rewriter.create<linalg::InitTensorOp>(
- loc, dynamicDims, accTy.getShape(), accETy);
+ Value poolEmptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, accTy.getShape(), accETy, dynamicDims);
- Value filledInitTensor =
+ Value filledEmptyTensor =
rewriter
.create<linalg::FillOp>(loc, ValueRange{initialValue},
- ValueRange{poolInitTensor})
+ ValueRange{poolEmptyTensor})
.result();
Value fakeWindowDims =
- rewriter.create<linalg::InitTensorOp>(loc, kernel, accETy);
+ rewriter.create<tensor::EmptyOp>(loc, kernel, accETy);
// Sum across the pooled region.
Value poolingOp = rewriter
.create<linalg::PoolingNhwcSumOp>(
loc, ArrayRef<Type>{accTy},
ValueRange{paddedInput, fakeWindowDims},
- filledInitTensor, strideAttr, dilationAttr)
+ filledEmptyTensor, strideAttr, dilationAttr)
.getResult(0);
// Normalize the summed value by the number of elements grouped in each
auto poolingOpTy = poolingOp.getType().cast<ShapedType>();
auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank());
- Value genericInitTensor = rewriter.create<linalg::InitTensorOp>(
- loc, dynamicDims, resultTy.getShape(), resultETy);
+ Value genericEmptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, resultTy.getShape(), resultETy, dynamicDims);
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, ArrayRef<Type>({resultTy}), ValueRange{poolingOp},
- ValueRange{genericInitTensor},
+ ValueRange{genericEmptyTensor},
ArrayRef<AffineMap>({affineMap, affineMap}),
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &b, Location loc, ValueRange args) {
auto oldResultType = padOp.getResultType();
SmallVector<int64_t, 4> staticShape(oldResultType.getRank(),
ShapedType::kDynamicSize);
- auto newInitOp = rewriter.create<InitTensorOp>(
- padOp.getLoc(), reifiedShape.front(), staticShape,
- oldResultType.getElementType());
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
+ padOp.getLoc(), staticShape, oldResultType.getElementType(),
+ reifiedShape.front());
auto newFillOp = rewriter.create<FillOp>(
- fillOp.getLoc(), ValueRange{padValue}, ValueRange{newInitOp});
+ fillOp.getLoc(), ValueRange{padValue}, ValueRange{emptyTensor});
rewriter.replaceOpWithNewOp<tensor::CastOp>(padOp, oldResultType,
newFillOp.result());
}
//===----------------------------------------------------------------------===//
-// InitTensorOp
-//===----------------------------------------------------------------------===//
-
-void InitTensorOp::build(OpBuilder &b, OperationState &result,
- ArrayRef<OpFoldResult> sizes, Type elementType,
- ArrayRef<NamedAttribute> attrs) {
- SmallVector<Value, 4> dynamicSizes;
- SmallVector<int64_t, 4> staticSizes;
- dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
- ShapedType::kDynamicSize);
- auto resultType = RankedTensorType ::get(staticSizes, elementType);
- build(b, result, resultType, dynamicSizes, b.getI64ArrayAttr(staticSizes));
- result.addAttributes(attrs);
-}
-
-LogicalResult InitTensorOp::verify() {
- RankedTensorType resultType = getType();
- SmallVector<int64_t, 4> staticSizes = llvm::to_vector<4>(llvm::map_range(
- getStaticSizes().cast<ArrayAttr>(),
- [](Attribute a) -> int64_t { return a.cast<IntegerAttr>().getInt(); }));
-
- if (failed(verifyListOfOperandsOrIntegers(
- *this, "sizes", resultType.getRank(), getStaticSizes(), getSizes(),
- ShapedType::isDynamic)))
- return failure();
-
- if (getStaticSizes().size() != static_cast<unsigned>(resultType.getRank()))
- return emitError("expected ") << resultType.getRank() << " sizes values";
-
- Type expectedType = InitTensorOp::inferResultType(
- staticSizes, resultType.getElementType(), resultType.getEncoding());
- if (resultType != expectedType) {
- return emitError("specified type ")
- << resultType << " does not match the inferred type "
- << expectedType;
- }
- return success();
-}
-
-Type InitTensorOp::inferResultType(ArrayRef<int64_t> staticSizes,
- Type elementType, Attribute encoding) {
- return RankedTensorType::get(staticSizes, elementType, encoding);
-}
-
-SmallVector<OpFoldResult> InitTensorOp::getMixedSizes() {
- SmallVector<OpFoldResult> mixedSizes;
- mixedSizes.reserve(getType().getRank());
- unsigned dynamicValIndex = 0;
- for (Attribute attr : getStaticSizes()) {
- auto intAttr = attr.cast<IntegerAttr>();
- if (!ShapedType::isDynamic(intAttr.getInt())) {
- mixedSizes.push_back(intAttr);
- continue;
- }
- mixedSizes.push_back(getSizes()[dynamicValIndex++]);
- }
- return mixedSizes;
-}
-
-namespace {
-/// Change the type of the result of a `linalg.init_tensor` by making the result
-/// type statically sized along dimension that in the original operation where
-/// defined as dynamic, but the size was defined using a `constant` op. For
-/// example
-///
-/// %c5 = arith.constant 5: index
-/// %0 = linalg.init_tensor [%arg0, %c5] : tensor<?x?xf32>
-///
-/// to
-///
-/// %0 = linalg.init_tensor [%arg0, 5] : tensor<?x5xf32>
-struct ReplaceStaticShapeDims : OpRewritePattern<InitTensorOp> {
- using OpRewritePattern<InitTensorOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(InitTensorOp op,
- PatternRewriter &rewriter) const override {
- SmallVector<Value, 4> dynamicSizes;
- SmallVector<int64_t, 4> staticSizes;
- for (unsigned i = 0, e = op.getType().getRank(); i != e; ++i) {
- // If the size is already static, nothing to do.
- if (!op.isDynamicSize(i)) {
- staticSizes.push_back(op.getStaticSize(i));
- continue;
- }
-
- // If the size is dynamic but defined using a `constant` op, get the
- // constant value to find the static size to use.
- unsigned operandNum = op.getIndexOfDynamicSize(i);
- Value sizeOperand = op.getOperand(operandNum);
- if (auto constantIndexOp =
- sizeOperand.getDefiningOp<arith::ConstantIndexOp>()) {
- staticSizes.push_back(constantIndexOp.value());
- continue;
- }
-
- // Fallback case. Keep the size dynamic.
- dynamicSizes.push_back(sizeOperand);
- staticSizes.push_back(ShapedType::kDynamicSize);
- }
- RankedTensorType newType =
- RankedTensorType::get(staticSizes, op.getType().getElementType());
- if (newType == op.getType())
- return failure();
- auto newOp =
- rewriter.create<InitTensorOp>(op.getLoc(), newType, dynamicSizes,
- rewriter.getI64ArrayAttr(staticSizes));
- rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
- return success();
- }
-};
-} // namespace
-
-namespace {
-/// Since `init_tensor` operation creates a tensor needed only for its shape, a
-/// slice of this is also needed only for its shape. The result can be
-/// replaced by a new init_tensor operation of the same size as the extract
-/// slice op.
-struct FoldInitTensorWithExtractSliceOp
- : public OpRewritePattern<tensor::ExtractSliceOp> {
- using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
- PatternRewriter &rewriter) const override {
- if (!sliceOp.getSource().getDefiningOp<linalg::InitTensorOp>())
- return failure();
- // ExtractSliceOp may be rank-reducing; its dynamic sizes must be preserved
- // as well as its result type.
- rewriter.replaceOpWithNewOp<linalg::InitTensorOp>(
- sliceOp, sliceOp.getSizes(),
- sliceOp.getResult().getType().cast<RankedTensorType>().getShape(),
- sliceOp.getSourceType().getElementType());
- return success();
- }
-};
-
-template <typename TensorReshapeOp>
-struct FoldInitTensorWithTensorReshapeOp
- : public OpRewritePattern<TensorReshapeOp> {
- using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp,
- PatternRewriter &rewriter) const override {
- if (!reshapeOp.getSrc().template getDefiningOp<InitTensorOp>())
- return failure();
- Location loc = reshapeOp.getLoc();
- ReifiedRankedShapedTypeDims resultShapes;
- ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
- cast<ReifyRankedShapedTypeOpInterface>(reshapeOp.getOperation());
- if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
- resultShapes)) ||
- !llvm::hasSingleElement(resultShapes))
- return failure();
- Value initTensor = rewriter.create<InitTensorOp>(
- loc, getAsOpFoldResult(resultShapes[0]),
- reshapeOp.getResultType().getElementType());
- if (initTensor.getType() != reshapeOp.getResultType()) {
- rewriter.replaceOpWithNewOp<tensor::CastOp>(
- reshapeOp, reshapeOp.getResultType(), initTensor);
- } else {
- rewriter.replaceOp(reshapeOp, initTensor);
- }
- return success();
- }
-};
-
-struct FoldInitTensorWithDimOp : public OpRewritePattern<tensor::DimOp> {
- using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(tensor::DimOp dimOp,
- PatternRewriter &rewriter) const override {
- Optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
- auto initTensorOp = dimOp.getSource().getDefiningOp<linalg::InitTensorOp>();
- if (!initTensorOp || !maybeConstantIndex)
- return failure();
- if (!initTensorOp.isDynamicSize(*maybeConstantIndex))
- return failure();
- rewriter.replaceOp(dimOp, initTensorOp.getDynamicSize(*maybeConstantIndex));
- return success();
- }
-};
-
-/// Canonicalize
-///
-/// ```mlir
-/// %0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
-/// %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x?xf32>
-/// ```
-///
-/// into
-///
-/// ```mlir
-/// %0 = linalg.init_tensor [4, %d1] : tensor<4x?xf32>
-/// ```
-///
-/// This assumes the input program is correct in terms of its shape. So it
-/// is safe to assume that `%d0` is in fact 4. If that was not the case, the
-/// input program is wrong to begin with, so its undefined behavior anyway (i.e.
-/// this optimization can still triggering without violating program semantics).
-struct FoldInitTensorWithTensorCastOp
- : public OpRewritePattern<tensor::CastOp> {
- using OpRewritePattern<tensor::CastOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(tensor::CastOp castOp,
- PatternRewriter &rewriter) const override {
- if (!canFoldIntoProducerOp(castOp))
- return failure();
- auto producer = castOp.getSource().getDefiningOp<InitTensorOp>();
- if (!producer)
- return failure();
-
- auto resultType = castOp->getResult(0).getType().cast<RankedTensorType>();
- ArrayRef<int64_t> resultShape = resultType.getShape();
- SmallVector<OpFoldResult> currMixedSizes = producer.getMixedSizes();
- SmallVector<OpFoldResult> newMixedSizes;
- newMixedSizes.reserve(currMixedSizes.size());
- assert(resultShape.size() == currMixedSizes.size() &&
- "mismatch in result shape and sizes of init_tensor op");
- for (auto it : llvm::zip(resultShape, currMixedSizes)) {
- int64_t newDim = std::get<0>(it);
- OpFoldResult currDim = std::get<1>(it);
- // Case 1: The init tensor dim is static. Check that the tensor cast
- // result dim matches.
- if (auto attr = currDim.dyn_cast<Attribute>()) {
- if (ShapedType::isDynamic(newDim) ||
- newDim != attr.cast<IntegerAttr>().getInt()) {
- // Something is off, the cast result shape cannot be more dynamic than
- // the init tensor result shape (enforced by `canFoldIntoProducer`).
- // Abort for now.
- return rewriter.notifyMatchFailure(
- producer, "mismatch in static value of shape of init "
- "tensor result and cast result");
- }
- newMixedSizes.push_back(attr);
- continue;
- }
-
- // Case 2 : The tensor cast shape is static, but init tensor result shape
- // is dynamic.
- if (!ShapedType::isDynamic(newDim)) {
- newMixedSizes.push_back(rewriter.getIndexAttr(newDim));
- continue;
- }
-
- // Case 3 : The tensor cast shape is dynamic and init tensor result shape
- // is dynamic. Use the dynamic value from the init tensor op.
- newMixedSizes.push_back(currDim);
- }
-
- rewriter.replaceOpWithNewOp<InitTensorOp>(castOp, newMixedSizes,
- resultType.getElementType());
- return success();
- }
-};
-
-} // namespace
-
-void InitTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
- MLIRContext *context) {
- results.add<FoldInitTensorWithTensorCastOp, FoldInitTensorWithDimOp,
- FoldInitTensorWithExtractSliceOp,
- FoldInitTensorWithTensorReshapeOp<tensor::ExpandShapeOp>,
- FoldInitTensorWithTensorReshapeOp<tensor::CollapseShapeOp>,
- ReplaceStaticShapeDims>(context);
-}
-
-LogicalResult InitTensorOp::reifyResultShapes(
- OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
- auto shapes = llvm::to_vector<4>(llvm::map_range(
- llvm::seq<int64_t>(0, getType().getRank()), [&](int64_t dim) -> Value {
- if (isDynamicSize(dim))
- return getDynamicSize(dim);
- return builder.create<arith::ConstantIndexOp>(getLoc(),
- getStaticSize(dim));
- }));
- reifiedReturnShapes.emplace_back(std::move(shapes));
- return success();
-}
-
-//===----------------------------------------------------------------------===//
// YieldOp
//===----------------------------------------------------------------------===//
/// gets split into
///
/// ```mlir
-/// %init = linalg.init_tensor ...
+/// %init = tensor.empty ...
/// %op0:3 = linalg.generic ... ins(%arg0, %arg1, %arg2 : ...)
/// outs(%init0, %init1, %init : ...)
/// ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ..., %b5: ...):
/// After canonicalization this is expected to be
///
/// ```mlir
-/// %init = linalg.init_tensor ...
+/// %init = tensor.empty ...
/// %op0 = linalg.generic ... ins(%arg0, %arg1, : ...)
/// outs(%init : ...)
/// ^bb0(%b0: ... , %b1: ... , %b2: ...):
// Fall back path, use an `init_tensor` and identity indexing map.
AffineMap indexingMap = rewriter.getMultiDimIdentityMap(domain.size());
- Value initTensor = rewriter.create<linalg::InitTensorOp>(
- loc, domain, scalarOpResult.getType());
- newInitValues.push_back(initTensor);
- newResultTypes.push_back(initTensor.getType());
+ Value emptyTensor =
+ rewriter.create<tensor::EmptyOp>(loc, domain, scalarOpResult.getType());
+ newInitValues.push_back(emptyTensor);
+ newResultTypes.push_back(emptyTensor.getType());
peeledGenericOpIndexingMaps.push_back(indexingMap);
}
/// For the following snippet:
/// ...
/// ^bb1(%6: tensor<i32>, %9: tensor<i32>):
- /// %7 = linalg.init_tensor [] : tensor<i32>
+ /// %7 = tensor.empty() : tensor<i32>
/// %8 = linalg.generic #attrs
/// ins(%6, %6 : tensor<i32>, tensor<i32>)
/// outs(%7 : tensor<i32>) {
RankReducedInsertSliceOp<tensor::ParallelInsertSliceOp>>(
context);
linalg::FillOp::getCanonicalizationPatterns(patterns, context);
- linalg::InitTensorOp::getCanonicalizationPatterns(patterns, context);
tensor::CollapseShapeOp::getCanonicalizationPatterns(patterns, context);
+ tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);
tensor::ExpandShapeOp::getCanonicalizationPatterns(patterns, context);
}
// ```mlir
// #map = affine_map<(d0, d1) -> (d0, d1)>
// %1 = tensor.expand_shape %0 [[0, 1]] : tensor<?xf32> into tensor<?x4xf32>
-// %2 = linalg.init_tensor [..] : tensor<?x4xf32>
+// %2 = tensor.empty [..] : tensor<?x4xf32>
// %3 = linalg.generic {
// indexing_maps = [#map, #map],
// iterator_types = ["parallel" ,"parallel"]}
//
// ```mlir
// #map = affine_map<(d0) -> (d0)>
-// %2 = linalg.init_tensor [..] : tensor<?xf32>
+// %2 = tensor.empty [..] : tensor<?xf32>
// %3 = linalg.generic {
// indexing_maps = [#map, #map],
// iterator_types = ["parallel"]}
// #map0 = affine_map<(d0, d1) -> (d0, d1)>
// #map1 = affine_map<(d0, d1) -> (d1, d0)>
// %1 = tensor.expand_shape %0 [[0, 1]] : tensor<?xf32> into tensor<?x4xf32>
-// %2 = linalg.init_tensor [..] : tensor<4x?xf32>
+// %2 = tensor.empty [..] : tensor<4x?xf32>
// %2 = linalg.generic {
// indexing_maps = [#map0, #map1],
// iterator_types = ["parallel" ,"parallel"]}
//===---------------------------------------------------------------------===//
namespace {
-/// Forces `outs` operands of linalg operations to use `linalg.init_tensor` if
-/// the value of the `outs` operand is not used within the op. This is only
+/// Forces `outs` operands of linalg operations to use `tensor.empty` if the
+/// value of the `outs` operand is not used within the op. This is only
/// implemented for `linalg.generic` operations for now, but should hold for all
/// linalg structured ops.
struct RemoveOutsDependency : public OpRewritePattern<GenericOp> {
if (sparse_tensor::getSparseTensorEncoding(operandVal.getType()))
continue;
- // If outs is already an `init_tensor` operation, nothing to do.
- auto definingOp = operandVal.getDefiningOp<InitTensorOp>();
+ // If outs is already an `empty` operation, nothing to do.
+ auto definingOp = operandVal.getDefiningOp<tensor::EmptyOp>();
if (definingOp)
continue;
modifiedOutput = true;
dynamicDims.push_back(rewriter.createOrFold<tensor::DimOp>(
loc, operandVal, dim.index()));
}
- Value initTensor = rewriter.create<InitTensorOp>(
- loc, dynamicDims, operandType.getShape(),
- operandType.getElementType());
- op->setOperand(opOperand->getOperandNumber(), initTensor);
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ loc, operandType.getShape(), operandType.getElementType(),
+ dynamicDims);
+ op->setOperand(opOperand->getOperandNumber(), emptyTensor);
}
}
if (!modifiedOutput) {
/// 1. `v.getType() == t`
/// 2. If an operand of `op` has type `t`, let `operand_first` be the first
/// such operand. Then`v == operand_first`.
-/// 3. Otherwise, v is a newly created `linalg::InitTensorOp` with:
+/// 3. Otherwise, v is a newly created `tensor::EmptyOp` with:
/// a. Static and dynamic dims extracted from the first operand of `op`.
/// b. Elemental type equal to the elemental type of `t`.
///
auto staticShape = llvm::to_vector<4>(rankedTensorType.getShape());
auto dynamicShape = linalg::getDynOperands(loc, firstOperand, b);
- res.push_back(b.create<linalg::InitTensorOp>(
- loc, dynamicShape, staticShape, rankedTensorType.getElementType()));
+ res.push_back(b.create<tensor::EmptyOp>(
+ loc, staticShape, rankedTensorType.getElementType(), dynamicShape));
}
return res;
}
// Create the tensor of same size as output of the pad op.
RankedTensorType padResultType = padOp.getResultType();
auto resultSizes = getAsOpFoldResult(resultShape[0]);
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
+ auto emptyTensor = rewriter.create<tensor::EmptyOp>(
loc, resultSizes, padResultType.getElementType());
// Fill the tensor with the pad value.
// TODO: There is an option to fill only the boundaries. For now just
// filling the whole tensor.
auto fillTensor =
- rewriter.create<linalg::FillOp>(loc, padValue, initTensor.getResult());
+ rewriter.create<linalg::FillOp>(loc, padValue, emptyTensor.getResult());
// Construct a slice of the fill result that is to be replaced with the
// result of the generic op. The low pad values are the offsets, the size of
llvm::append_range(packedShape, transposedTensorType->getShape());
auto packedTensorType = RankedTensorType::get(
packedShape, transposedTensorType->getElementType());
- Value packedTensor = b.create<linalg::InitTensorOp>(
- loc, dynamicTensorSizes, packedTensorType.getShape(),
- packedTensorType.getElementType());
+ Value packedTensor = b.create<tensor::EmptyOp>(
+ loc, packedTensorType.getShape(), packedTensorType.getElementType(),
+ dynamicTensorSizes);
// Clone the operations involved in the backward slice, iteratively stepping
// into the loops that we encounter.
// Transpose the packed tensor back to the original storage order.
if (!transposeVector.empty()) {
- Value initTensor =
- b.create<InitTensorOp>(loc, ValueRange{}, paddedTensorType.getShape(),
- paddedTensorType.getElementType());
+ Value emptyTensor = b.create<tensor::EmptyOp>(
+ loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
transposeOps.push_back(
- makeTransposeOp(b, loc, newResult, initTensor, transposeVector));
+ makeTransposeOp(b, loc, newResult, emptyTensor, transposeVector));
newResult = transposeOps.back()->getResult(0);
}
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
namespace mlir {
-#define GEN_PASS_DEF_LINALGINITTENSORTOALLOCTENSOR
+#define GEN_PASS_DEF_EMPTYTENSORTOALLOCTENSOR
#include "mlir/Dialect/Linalg/Passes.h.inc"
} // namespace mlir
using namespace mlir;
using namespace mlir::bufferization;
-using namespace mlir::linalg;
+using namespace mlir::tensor;
namespace {
-struct InitTensorLoweringPattern : public OpRewritePattern<InitTensorOp> {
- using OpRewritePattern<InitTensorOp>::OpRewritePattern;
+struct EmptyTensorLoweringPattern : public OpRewritePattern<tensor::EmptyOp> {
+ using OpRewritePattern<tensor::EmptyOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(InitTensorOp op,
+ LogicalResult matchAndRewrite(tensor::EmptyOp op,
PatternRewriter &rewriter) const override {
- rewriter.replaceOpWithNewOp<bufferization::AllocTensorOp>(op, op.getType(),
- op.getSizes());
+ rewriter.replaceOpWithNewOp<bufferization::AllocTensorOp>(
+ op, op.getType(), op.getDynamicSizes());
return success();
}
};
-struct LinalgInitTensorToAllocTensor
- : public impl::LinalgInitTensorToAllocTensorBase<
- LinalgInitTensorToAllocTensor> {
- LinalgInitTensorToAllocTensor() = default;
+struct EmptyTensorToAllocTensor
+ : public impl::EmptyTensorToAllocTensorBase<EmptyTensorToAllocTensor> {
+ EmptyTensorToAllocTensor() = default;
void runOnOperation() override;
void getDependentDialects(DialectRegistry ®istry) const override {
registry
- .insert<linalg::LinalgDialect, bufferization::BufferizationDialect>();
+ .insert<tensor::TensorDialect, bufferization::BufferizationDialect>();
}
};
} // namespace
-void LinalgInitTensorToAllocTensor::runOnOperation() {
+void EmptyTensorToAllocTensor::runOnOperation() {
Operation *op = getOperation();
RewritePatternSet patterns(op->getContext());
- patterns.insert<InitTensorLoweringPattern>(op->getContext());
+ patterns.insert<EmptyTensorLoweringPattern>(op->getContext());
if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
signalPassFailure();
}
-std::unique_ptr<Pass> mlir::createLinalgInitTensorToAllocTensorPass() {
- return std::make_unique<LinalgInitTensorToAllocTensor>();
+std::unique_ptr<Pass> mlir::createEmptyTensorToAllocTensorPass() {
+ return std::make_unique<EmptyTensorToAllocTensor>();
}
b.getAffineDimExpr(dim < insertSplitDimension ? dim : dim + 1));
}
}
- Value initOrAllocTensor;
+ Value emptyOrAllocTensor;
if (useAlloc) {
- initOrAllocTensor = b.create<bufferization::AllocTensorOp>(
+ emptyOrAllocTensor = b.create<bufferization::AllocTensorOp>(
loc,
RankedTensorType::get(newOutputShape,
op.getRegionOutputArgs()[0].getType()),
ValueRange{});
} else {
- initOrAllocTensor = b.create<linalg::InitTensorOp>(
+ emptyOrAllocTensor = b.create<tensor::EmptyOp>(
loc, newOutputShape, op.getRegionOutputArgs()[0].getType());
}
Value constantOp = b.create<arith::ConstantOp>(loc, identity);
Value identityTensor =
- b.create<linalg::FillOp>(op->getLoc(), constantOp, initOrAllocTensor)
+ b.create<linalg::FillOp>(op->getLoc(), constantOp, emptyOrAllocTensor)
.getResult(0);
newMaps.push_back(AffineMap::get(oldOutputMap.getNumDims() + 1, 0, outputExpr,
// Create the new op matching the original op with an extra parallel
// dimension.
GenericOp genericOp = b.create<GenericOp>(
- loc, TypeRange({initOrAllocTensor.getType()}), newInputs,
+ loc, TypeRange({emptyOrAllocTensor.getType()}), newInputs,
ValueRange({identityTensor}), newMaps, newIteratorTypes);
b.inlineRegionBefore(op->getRegion(0), genericOp.getRegion(),
genericOp.getRegion().begin());
});
b.replaceOp(op, reduction.getResults());
- return SplitReductionResult{
- initOrAllocTensor.getDefiningOp(), identityTensor.getDefiningOp<FillOp>(),
- cast<LinalgOp>(genericOp.getOperation()), reduction};
+ return SplitReductionResult{emptyOrAllocTensor.getDefiningOp(),
+ identityTensor.getDefiningOp<FillOp>(),
+ cast<LinalgOp>(genericOp.getOperation()),
+ reduction};
}
/// Rewrite f(i, j, k, ...) into f(i, j, k * ratio + kk, ...)
// TODO: generalize when multi-reduction support is available.
SmallVector<Value> newOutputs;
newOutputs.reserve(op.getNumOutputs());
- SmallVector<Operation *> initOrAllocTensorOps;
+ SmallVector<Operation *> emptyOrAllocTensorOps;
SmallVector<linalg::FillOp> fillOps;
fillOps.reserve(op.getNumOutputs());
for (auto it : llvm::zip(op.getOutputs(), neutralElements)) {
reductionDimSize / splitFactor, insertSplitDimension);
SmallVector<Value> dims =
tensor::createDynamicDimValues(b, loc, rankedTensor);
- Value initOrAllocTensor;
+ Value emptyOrAllocTensor;
if (useAlloc) {
- initOrAllocTensor =
+ emptyOrAllocTensor =
b.create<bufferization::AllocTensorOp>(loc, newT, dims);
} else {
- initOrAllocTensor = b.create<linalg::InitTensorOp>(
- loc, dims, newT.getShape(), t.getElementType());
+ emptyOrAllocTensor = b.create<tensor::EmptyOp>(loc, newT.getShape(),
+ t.getElementType(), dims);
}
Value constantOp = b.create<arith::ConstantOp>(loc, std::get<1>(it));
fillOps.push_back(
- b.create<linalg::FillOp>(op->getLoc(), constantOp, initOrAllocTensor));
+ b.create<linalg::FillOp>(op->getLoc(), constantOp, emptyOrAllocTensor));
newOutputs.push_back(fillOps.back().getResult(0));
- initOrAllocTensorOps.push_back(initOrAllocTensor.getDefiningOp());
+ emptyOrAllocTensorOps.push_back(emptyOrAllocTensor.getDefiningOp());
}
// Step 2. Reindex / expand indexing maps.
auto newInputs = llvm::to_vector<4>(op.getInputs());
// Add a single shape-only tensor to carry the dimensions without resorting to
// more complex inversions.
- newInputs.push_back(b.create<linalg::InitTensorOp>(
+ newInputs.push_back(b.create<tensor::EmptyOp>(
loc, ArrayRef<int64_t>{reductionDimSize / splitFactor, splitFactor},
b.getIntegerType(1)));
// Output tensors are already good to go.
// TODO: extend when multi-reduction support is available.
assert(fillOps.size() == results.size() && results.size() == 1);
b.replaceOp(op, results.front()->getResults());
- return SplitReductionResult{initOrAllocTensorOps.front(), fillOps.front(),
+ return SplitReductionResult{emptyOrAllocTensorOps.front(), fillOps.front(),
cast<LinalgOp>(genericOp.getOperation()),
results.front()};
}
scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
+ tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
-
- InitTensorOp::getCanonicalizationPatterns(patterns, ctx);
tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
}
-/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp (to
+/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp (to
/// initialize with pad_val) and GenericOp (to copy contents).
LogicalResult
PadOpTransformationPattern::matchAndRewrite(tensor::PadOp padOp,
Location loc = padOp.getLoc();
SmallVector<Value> indices(resultShapedType.getRank(),
rewriter.create<arith::ConstantIndexOp>(loc, 0));
- Value initTensor = rewriter.create<InitTensorOp>(
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
loc, resultShapedType.getShape(), resultShapedType.getElementType());
// Initialize tensor with the pad value
Value tmpTensor = rewriter
.create<linalg::FillOp>(loc, ValueRange{padValue},
- ValueRange{initTensor})
+ ValueRange{emptyTensor})
.result();
// Copy original contents into new tensor
};
auto resultType = padOp.getResultType();
- // Compute size of InitTensorOp. Any combination of static/dynamic is
- // supported.
+ // Compute size of EmptyOp. Any combination of static/dynamic is supported.
SmallVector<Value> dynSizes;
SmallVector<int64_t> staticSizes;
for (unsigned dim = 0; dim < resultType.getRank(); ++dim) {
}
// Init tensor and fill it with padding.
- Value init = rewriter.create<InitTensorOp>(
- padOp.getLoc(), dynSizes, staticSizes, resultType.getElementType());
- Value fill = createFillOrGenerateOp(rewriter, padOp, init, dynSizes);
+ Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+ padOp.getLoc(), staticSizes, resultType.getElementType(), dynSizes);
+ Value fill = createFillOrGenerateOp(rewriter, padOp, emptyTensor, dynSizes);
// Try optimize the copy of source.
if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded())
return result;
}
-/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
/// InsertSliceOp. For now, only constant padding values are supported.
/// If there is enough static type information, TransferReadOps and
/// TransferWriteOps may be generated instead of InsertSliceOps.
/// Returns true if tensor materializes uninitialized into the computation.
static bool isMaterializing(Value val) {
- return val.getDefiningOp<linalg::InitTensorOp>() ||
+ return val.getDefiningOp<tensor::EmptyOp>() ||
val.getDefiningOp<bufferization::AllocTensorOp>();
}
Core
LINK_LIBS PUBLIC
+ MLIRAffineDialect
MLIRArithDialect
MLIRArithUtils
MLIRCastInterfaces
//
//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
}
//===----------------------------------------------------------------------===//
+// EmptyOp
+//===----------------------------------------------------------------------===//
+
+void EmptyOp::build(OpBuilder &builder, OperationState &result,
+ ArrayRef<int64_t> staticShape, Type elementType) {
+ assert(all_of(staticShape,
+ [](int64_t sz) { return !ShapedType::isDynamic(sz); }) &&
+ "expected only static sizes");
+ build(builder, result, staticShape, elementType, {});
+}
+
+void EmptyOp::build(OpBuilder &builder, OperationState &result,
+ ArrayRef<int64_t> staticShape, Type elementType,
+ ValueRange dynamicSizes) {
+ auto tensorType = RankedTensorType::get(staticShape, elementType);
+ build(builder, result, tensorType, dynamicSizes);
+}
+
+void EmptyOp::build(OpBuilder &builder, OperationState &result,
+ ArrayRef<OpFoldResult> sizes, Type elementType) {
+ SmallVector<int64_t> staticShape;
+ SmallVector<Value> dynamicSizes;
+ dispatchIndexOpFoldResults(sizes, dynamicSizes, staticShape,
+ ShapedType::kDynamicSize);
+ build(builder, result, staticShape, elementType, dynamicSizes);
+}
+
+LogicalResult EmptyOp::verify() {
+ if (getType().getNumDynamicDims() !=
+ static_cast<int64_t>(getDynamicSizes().size()))
+ return emitOpError("incorrect number of dynamic sizes, has ")
+ << getDynamicSizes().size() << ", expected "
+ << getType().getNumDynamicDims();
+ return success();
+}
+
+LogicalResult
+EmptyOp::reifyResultShapes(OpBuilder &builder,
+ ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+ reifiedReturnShapes.resize(1, SmallVector<Value>(getType().getRank()));
+ unsigned ctr = 0;
+ for (int64_t i = 0; i < getType().getRank(); ++i) {
+ if (getType().isDynamicDim(i)) {
+ reifiedReturnShapes[0][i] = getDynamicSizes()[ctr++];
+ } else {
+ reifiedReturnShapes[0][i] =
+ builder.create<arith::ConstantIndexOp>(getLoc(), i);
+ }
+ }
+ return success();
+}
+
+Value EmptyOp::getDynamicSize(unsigned idx) {
+ assert(getType().isDynamicDim(idx) && "expected dynamic dim");
+ unsigned ctr = 0;
+ for (int64_t i = 0; i < static_cast<int64_t>(idx); ++i)
+ if (getType().isDynamicDim(i))
+ ++ctr;
+ return getDynamicSizes()[ctr];
+}
+
+SmallVector<OpFoldResult> EmptyOp::getMixedSizes() {
+ SmallVector<OpFoldResult> result;
+ unsigned ctr = 0;
+ OpBuilder b(getContext());
+ for (int64_t i = 0; i < getType().getRank(); ++i) {
+ if (getType().isDynamicDim(i)) {
+ result.push_back(getDynamicSizes()[ctr++]);
+ } else {
+ result.push_back(b.getIndexAttr(getType().getShape()[i]));
+ }
+ }
+ return result;
+}
+
+namespace {
+/// Change the type of the result of a `tensor.empty` by making the result
+/// type statically sized along dimensions that in the original operation were
+/// defined as dynamic, but the size was defined using a `constant` op. For
+/// example
+///
+/// %c5 = arith.constant 5: index
+/// %0 = tensor.empty(%arg0, %c5) : tensor<?x?xf32>
+///
+/// to
+///
+/// %0 = tensor.empty(%arg0) : tensor<?x5xf32>
+struct ReplaceEmptyTensorStaticShapeDims : OpRewritePattern<EmptyOp> {
+ using OpRewritePattern<EmptyOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(EmptyOp op,
+ PatternRewriter &rewriter) const override {
+ SmallVector<int64_t> staticShape(op.getType().getShape().begin(),
+ op.getType().getShape().end());
+ SmallVector<Value> dynamicSizes;
+
+ // Compute new static and dynamic sizes.
+ unsigned ctr = 0;
+ bool changedType = false;
+ for (int64_t i = 0; i < op.getType().getRank(); ++i) {
+ if (op.getType().isDynamicDim(i)) {
+ Value dynamicSize = op.getDynamicSizes()[ctr++];
+ Optional<int64_t> cst = getConstantIntValue(dynamicSize);
+ if (cst.has_value()) {
+ staticShape[i] = *cst;
+ changedType = true;
+ } else {
+ dynamicSizes.push_back(dynamicSize);
+ }
+ }
+ }
+
+ // Stop here if no dynamic size was promoted to static.
+ if (!changedType)
+ return failure();
+
+ auto tensorType = RankedTensorType::get(
+ staticShape, op.getType().getElementType(), op.getType().getEncoding());
+ auto newOp =
+ rewriter.create<EmptyOp>(op.getLoc(), tensorType, dynamicSizes);
+ rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
+ return success();
+ }
+};
+
+/// `tensor.empty` does not define any tensor contents, so a slice of a
+/// `tensor.empty` can be canonicalized to a smaller `tensor.empty`.
+struct FoldEmptyTensorWithExtractSliceOp
+ : public OpRewritePattern<ExtractSliceOp> {
+ using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
+ PatternRewriter &rewriter) const override {
+ if (!sliceOp.getSource().getDefiningOp<EmptyOp>())
+ return failure();
+
+ // ExtractSliceOp may be rank-reducing; its dynamic sizes must be
+ // preserved as well as its result type.
+ auto tensorType = RankedTensorType::get(sliceOp.getType().getShape(),
+ sliceOp.getType().getElementType(),
+ sliceOp.getType().getEncoding());
+ rewriter.replaceOpWithNewOp<EmptyOp>(sliceOp, tensorType,
+ sliceOp.getSizes());
+ return success();
+ }
+};
+
+template <typename ReshapeOp>
+struct FoldEmptyTensorWithReshapeOp : public OpRewritePattern<ReshapeOp> {
+ using OpRewritePattern<ReshapeOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(ReshapeOp reshapeOp,
+ PatternRewriter &rewriter) const override {
+ if (!reshapeOp.getSrc().template getDefiningOp<EmptyOp>())
+ return failure();
+ Location loc = reshapeOp.getLoc();
+ ReifiedRankedShapedTypeDims resultShapes;
+ ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+ cast<ReifyRankedShapedTypeOpInterface>(reshapeOp.getOperation());
+ if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
+ resultShapes)) ||
+ !llvm::hasSingleElement(resultShapes))
+ return failure();
+ // TODO: Do not drop tensor type encoding.
+ Value emptyTensor =
+ rewriter.create<EmptyOp>(loc, getAsOpFoldResult(resultShapes[0]),
+ reshapeOp.getResultType().getElementType());
+ if (emptyTensor.getType() != reshapeOp.getResultType()) {
+ rewriter.replaceOpWithNewOp<tensor::CastOp>(
+ reshapeOp, reshapeOp.getResultType(), emptyTensor);
+ } else {
+ rewriter.replaceOp(reshapeOp, emptyTensor);
+ }
+ return success();
+ }
+};
+
+struct FoldEmptyTensorWithDimOp : public OpRewritePattern<DimOp> {
+ using OpRewritePattern<DimOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(tensor::DimOp dimOp,
+ PatternRewriter &rewriter) const override {
+ Optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
+ auto emptyTensorOp = dimOp.getSource().getDefiningOp<EmptyOp>();
+ if (!emptyTensorOp || !maybeConstantIndex)
+ return failure();
+ if (!emptyTensorOp.getType().isDynamicDim(*maybeConstantIndex))
+ return failure();
+ rewriter.replaceOp(dimOp,
+ emptyTensorOp.getDynamicSize(*maybeConstantIndex));
+ return success();
+ }
+};
+
+/// Canonicalize
+///
+/// ```mlir
+/// %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
+/// %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x?xf32>
+/// ```
+///
+/// into
+///
+/// ```mlir
+/// %0 = tensor.empty(%d1) : tensor<4x?xf32>
+/// ```
+///
+/// This assumes the input program is correct in terms of its shape. So it is
+/// safe to assume that `%d0` is in fact 4.
+struct FoldEmptyTensorWithCastOp : public OpRewritePattern<CastOp> {
+ using OpRewritePattern<CastOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(CastOp castOp,
+ PatternRewriter &rewriter) const override {
+ if (!canFoldIntoProducerOp(castOp))
+ return failure();
+ auto producer = castOp.getSource().getDefiningOp<EmptyOp>();
+ if (!producer)
+ return failure();
+
+ auto resultType = castOp->getResult(0).getType().cast<RankedTensorType>();
+ ArrayRef<int64_t> resultShape = resultType.getShape();
+ SmallVector<OpFoldResult> currMixedSizes = producer.getMixedSizes();
+ SmallVector<OpFoldResult> newMixedSizes;
+ newMixedSizes.reserve(currMixedSizes.size());
+ assert(resultShape.size() == currMixedSizes.size() &&
+ "mismatch in result shape and sizes of empty op");
+ for (auto it : llvm::zip(resultShape, currMixedSizes)) {
+ int64_t newDim = std::get<0>(it);
+ OpFoldResult currDim = std::get<1>(it);
+ // Case 1: The empty tensor dim is static. Check that the tensor cast
+ // result dim matches.
+ if (auto attr = currDim.dyn_cast<Attribute>()) {
+ if (ShapedType::isDynamic(newDim) ||
+ newDim != attr.cast<IntegerAttr>().getInt()) {
+ // Something is off, the cast result shape cannot be more dynamic
+ // than the empty tensor result shape (enforced by
+ // `canFoldIntoProducer`). Abort for now.
+ return rewriter.notifyMatchFailure(
+ producer, "mismatch in static value of shape of empty tensor "
+ "result and cast result");
+ }
+ newMixedSizes.push_back(attr);
+ continue;
+ }
+
+ // Case 2 : The tensor cast shape is static, but empty tensor result
+ // shape is dynamic.
+ if (!ShapedType::isDynamic(newDim)) {
+ newMixedSizes.push_back(rewriter.getIndexAttr(newDim));
+ continue;
+ }
+
+ // Case 3 : The tensor cast shape is dynamic and empty tensor result
+ // shape is dynamic. Use the dynamic value from the empty tensor op.
+ newMixedSizes.push_back(currDim);
+ }
+
+ // TODO: Do not drop tensor encoding.
+ rewriter.replaceOpWithNewOp<EmptyOp>(castOp, newMixedSizes,
+ resultType.getElementType());
+ return success();
+ }
+};
+
+} // namespace
+
+void EmptyOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<FoldEmptyTensorWithCastOp, FoldEmptyTensorWithDimOp,
+ FoldEmptyTensorWithExtractSliceOp,
+ FoldEmptyTensorWithReshapeOp<tensor::ExpandShapeOp>,
+ FoldEmptyTensorWithReshapeOp<tensor::CollapseShapeOp>,
+ ReplaceEmptyTensorStaticShapeDims>(context);
+}
+
+//===----------------------------------------------------------------------===//
// ExtractOp
//===----------------------------------------------------------------------===//
}
OpFoldResult ExtractOp::fold(ArrayRef<Attribute> operands) {
- // If this is a splat elements attribute, simply return the value. All of the
- // elements of a splat attribute are the same.
+ // If this is a splat elements attribute, simply return the value. All of
+ // the elements of a splat attribute are the same.
if (Attribute tensor = operands.front())
if (auto splatTensor = tensor.dyn_cast<SplatElementsAttr>())
return splatTensor.getSplatValue<Attribute>();
stride *= tensorType.getDimSize(i);
flatIndex += indices[i] * stride;
}
- // Prevent out of bounds accesses. This can happen in invalid code that will
- // never execute.
+ // Prevent out of bounds accesses. This can happen in invalid code that
+ // will never execute.
if (static_cast<int>(fromElementsOp.getElements().size()) <= flatIndex ||
flatIndex < 0)
return {};
//
// to just %element.
//
-// Consider expanding this to a template and handle all tensor cast operations.
+// Consider expanding this to a template and handle all tensor cast
+// operations.
struct ExtractElementFromIndexCast
: public OpRewritePattern<tensor::ExtractOp> {
using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
}
LogicalResult GenerateOp::verify() {
- // Ensure that the tensor type has as many dynamic dimensions as are specified
- // by the operands.
+ // Ensure that the tensor type has as many dynamic dimensions as are
+ // specified by the operands.
RankedTensorType resultTy = getType().cast<RankedTensorType>();
if (getNumOperands() != resultTy.getNumDynamicDims())
return emitError("must have as many index operands as dynamic extents "
getReassociationIndices());
}
-/// Compute the RankedTensorType obtained by applying `reassociation` to `type`.
+/// Compute the RankedTensorType obtained by applying `reassociation` to
+/// `type`.
static RankedTensorType
computeTensorReshapeCollapsedType(RankedTensorType type,
ArrayRef<AffineMap> reassociation) {
}
};
-/// Reshape of a FromElements can be replaced with a FromElements of the result
-/// type
+/// Reshape of a FromElements can be replaced with a FromElements of the
+/// result type
template <typename TensorReshapeOp>
struct FoldReshapeWithFromElements : OpRewritePattern<TensorReshapeOp> {
using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
ShapedType sourceShapedTensorType, ArrayRef<int64_t> staticOffsets,
ArrayRef<int64_t> staticSizes, ArrayRef<int64_t> staticStrides) {
// An extract_slice op may specify only a leading subset of offset/sizes/
- // strides in which case we complete with offset=0, sizes from memref type and
- // strides=1.
+ // strides in which case we complete with offset=0, sizes from memref type
+ // and strides=1.
assert(static_cast<int64_t>(staticSizes.size()) ==
sourceShapedTensorType.getRank() &&
"unexpected staticSizes not equal to rank of source");
}
/// If the rank is reduced (i.e. the desiredResultRank is smaller than the
-/// number of sizes), drop as many size 1 as needed to produce an inferred type
-/// with the desired rank.
+/// number of sizes), drop as many size 1 as needed to produce an inferred
+/// type with the desired rank.
///
/// Note that there may be multiple ways to compute this rank-reduced type:
/// e.g. 1x6x1 can rank-reduce to either 1x6 or 6x1 2-D tensors.
build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
}
-/// Build an ExtractSliceOp with mixed static and dynamic entries packed into a
-/// Range vector.
+/// Build an ExtractSliceOp with mixed static and dynamic entries packed into
+/// a Range vector.
void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
ArrayRef<Range> ranges,
ArrayRef<NamedAttribute> attrs) {
build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
}
-/// Build an ExtractSliceOp with dynamic entries and custom result type. If the
-/// type passed is nullptr, it is inferred.
+/// Build an ExtractSliceOp with dynamic entries and custom result type. If
+/// the type passed is nullptr, it is inferred.
void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
RankedTensorType resultType, Value source,
ValueRange offsets, ValueRange sizes,
}
}
-/// Fold arith.constant and tensor.extract_slice into arith.constant. The folded
-/// operation might introduce more constant data; Users can control their
-/// heuristics by the control function.
+/// Fold arith.constant and tensor.extract_slice into arith.constant. The
+/// folded operation might introduce more constant data; Users can control
+/// their heuristics by the control function.
class ConstantOpExtractSliceFolder final
: public OpRewritePattern<ExtractSliceOp> {
public:
for (OpFoldResult ofr : op.getMixedOffsets())
if (getConstantIntValue(ofr) != static_cast<int64_t>(0))
return failure();
- // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip
- // is appropriate.
+ // Rank-reducing noops only need to inspect the leading dimensions:
+ // llvm::zip is appropriate.
auto shape = shapedType.getShape();
for (auto it : llvm::zip(op.getMixedSizes(), shape))
if (getConstantIntValue(std::get<0>(it)) != std::get<1>(it))
return success();
}
-/// If we have an ExtractSliceOp consuming an InsertSliceOp with the same slice,
-/// we can return the InsertSliceOp's source directly.
+/// If we have an ExtractSliceOp consuming an InsertSliceOp with the same
+/// slice, we can return the InsertSliceOp's source directly.
// TODO: This only checks the immediate producer; extend to go up the
// insert/extract chain if the slices are disjoint.
static Value foldExtractAfterInsertSlice(ExtractSliceOp extractOp) {
ArrayAttr staticOffsets, ArrayAttr staticSizes,
ArrayAttr staticStrides,
ShapedType *expectedType = nullptr) {
- // insert_slice is the inverse of extract_slice, use the same type inference.
+ // insert_slice is the inverse of extract_slice, use the same type
+ // inference.
RankedTensorType expected = ExtractSliceOp::inferResultType(
dstType, extractFromI64ArrayAttr(staticOffsets),
extractFromI64ArrayAttr(staticSizes),
Value toInsert = insertSliceOp.getSource();
if (sourceType != insertSliceOp.getSourceType()) {
OpBuilder::InsertionGuard g(rewriter);
- // The only difference between InsertSliceOp and ParallelInsertSliceOp is
- // the the insertion point is just before the ParallelCombiningOp in the
- // parallel case.
+ // The only difference between InsertSliceOp and ParallelInsertSliceOp
+ // is the the insertion point is just before the ParallelCombiningOp in
+ // the parallel case.
if (std::is_same<InsertOpTy, ParallelInsertSliceOp>::value)
rewriter.setInsertionPoint(insertSliceOp->getParentOp());
toInsert = rewriter.create<tensor::CastOp>(insertSliceOp.getLoc(),
}
};
-/// Fold tensor_casts with insert_slice operations. If the source or destination
-/// tensor is a tensor_cast that removes static type information, the cast is
-/// folded into the insert_slice operation. E.g.:
+/// Fold tensor_casts with insert_slice operations. If the source or
+/// destination tensor is a tensor_cast that removes static type information,
+/// the cast is folded into the insert_slice operation. E.g.:
///
/// ```mlir
/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
/// 5) the tensor::PadOps do not have common padding dimensions,
/// 6) one tensor::ExtractSliceOp, tensor::PadOp pair has zero-padding and
/// zero-offset for every dimension.
-/// 7) the tensor::ExtractSliceOp sizes match the source tensor sizes for the
+/// 7) the tensor::ExtractSliceOp sizes match the source tensor sizes for
+/// the
/// padded source dimensions.
///
/// Example:
padOp, "cannot find zero-offset and zero-padding pair");
}
- // 7) Combine the sizes of the two tensor::ExtractSliceOps. Take the size of
- // the outer tensor::ExtractSliceOp for the dimensions padded by the outer
- // tensor::PadOp and fail if the size of the inner tensor::ExtractSliceOp
- // does not match the size of the padded dimension. Otherwise, take the size
- // of the inner tensor::ExtractSliceOp.
+ // 7) Combine the sizes of the two tensor::ExtractSliceOps. Take the size
+ // of the outer tensor::ExtractSliceOp for the dimensions padded by the
+ // outer tensor::PadOp and fail if the size of the inner
+ // tensor::ExtractSliceOp does not match the size of the padded dimension.
+ // Otherwise, take the size of the inner tensor::ExtractSliceOp.
SmallVector<OpFoldResult> newSizes = innerSliceOp.getMixedSizes();
for (auto &en : enumerate(newSizes)) {
if (!outerDims.test(en.index()))
newHighPad[en.index()] = outerPadOp.getMixedHighPad()[en.index()];
}
- // Create a new tensor::ExtractSliceOp, tensor::PadOp pair that performs the
- // two paddings in one step.
+ // Create a new tensor::ExtractSliceOp, tensor::PadOp pair that performs
+ // the two paddings in one step.
auto newSliceOp = rewriter.create<ExtractSliceOp>(
padOp.getLoc(), outerSliceOp.getSource(), newOffsets, newSizes,
innerSliceOp.getMixedStrides());
result.addAttributes(attrs);
}
-/// Build an ParallelInsertSliceOp with mixed static and dynamic entries packed
-/// into a Range vector.
+/// Build an ParallelInsertSliceOp with mixed static and dynamic entries
+/// packed into a Range vector.
void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
Value source, Value dest,
ArrayRef<Range> ranges,
if (!constOperand.isa_and_nonnull<IntegerAttr, FloatAttr>())
return {};
- // SplatElementsAttr::get treats single value for second arg as being a splat.
+ // SplatElementsAttr::get treats single value for second arg as being a
+ // splat.
return SplatElementsAttr::get(getType(), {constOperand});
}
auto padOp = cast<PadOp>(op);
SmallVector<OpFoldResult> mixedSizes = getAsOpFoldResult(reifiedShapes[0]);
- Value initTensor = b.create<linalg::InitTensorOp>(
+ Value emptyTensor = b.create<EmptyOp>(
op->getLoc(), mixedSizes, padOp.getResultType().getElementType());
- return {initTensor};
+ return {emptyTensor};
}
SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
ADD_TO_PARENT MLIRPythonSources.Dialects
ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
TD_FILE dialects/TensorOps.td
- SOURCES dialects/tensor.py
+ SOURCES
+ dialects/tensor.py
+ dialects/_tensor_ops_ext.py
DIALECT_NAME tensor)
declare_mlir_dialect_python_bindings(
return False
-class InitTensorOp:
- """Extends the linalg.init_tensor op."""
-
- def __init__(self,
- sizes: Union[Sequence[int], Sequence[Value]],
- element_type: Type,
- *,
- loc=None,
- ip=None):
- """Constructs an `init_tensor` with either static or dynamic sizes."""
- context = get_default_loc_context(loc)
- operands = []
- attributes = {}
- # TODO: Refactor the InitTensorOp to take an element type attribute and
- # then use normal result type inference, unifying the Python and C++ side
- # with a standard mechanism (versus stashing that in builders).
- if sizes and isinstance(sizes[0], Value):
- # Dynamic sizes.
- operands.extend(sizes)
- static_size_ints = [-1] * len(sizes)
- result_type = RankedTensorType.get(static_size_ints, element_type)
- else:
- # Static sizes.
- result_type = RankedTensorType.get(sizes, element_type)
- static_size_ints = sizes
-
- i64_type = IntegerType.get_signless(64)
- attributes["static_sizes"] = ArrayAttr.get(
- [IntegerAttr.get(i64_type, s) for s in static_size_ints],
- context=context)
- op = self.build_generic(results=[result_type],
- operands=operands,
- attributes=attributes,
- loc=loc,
- ip=ip)
- OpView.__init__(self, op)
-
-
class StructuredOpMixin:
"""All structured ops use the same mixin class."""
--- /dev/null
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+try:
+ from ..ir import *
+except ImportError as e:
+ raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Any, Optional, Sequence, Union
+from ._ods_common import get_op_result_or_value as _get_op_result_or_value, get_op_results_or_values as _get_op_results_or_values
+
+
+class EmptyOp:
+ """Extends the tensor.empty op."""
+
+ def __init__(self,
+ sizes: Sequence[Union[int, Value]],
+ element_type: Type,
+ *,
+ loc=None,
+ ip=None):
+ """Constructs an `empty` with mixed static/dynamic sizes."""
+ # TODO: Refactor the EmptyOp to take an element type attribute and
+ # then use normal result type inference, unifying the Python and C++ side
+ # with a standard mechanism (versus stashing that in builders).
+ dynamic_sizes = []
+ static_sizes = []
+ for s in sizes:
+ if isinstance(s, int):
+ static_sizes.append(s)
+ else:
+ static_sizes.append(-1)
+ dynamic_sizes.append(s)
+ result_type = RankedTensorType.get(static_sizes, element_type)
+ op = self.build_generic(
+ results=[result_type],
+ operands=dynamic_sizes,
+ attributes={},
+ loc=loc,
+ ip=ip)
+ OpView.__init__(self, op)
// CHECK-LABEL: func @generalize_pad_tensor_static_shape(
// CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
// CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[INIT:.*]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
+// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
// CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
// CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32>
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
// CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
-// CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]] : tensor<4x?x?x?xf32>
+// CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
// CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32>
// CHECK: return %[[PADDED]] : tensor<4x?x?x?xf32>
// CHECK-LABEL: @matmul
func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
// CHECK: [[C0:%.+]] = arith.constant 0
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
// CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
%0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>)
// CHECK-LABEL: @matmul_quantized
func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
// CHECK: [[C0:%.+]] = arith.constant 0
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
// CHECK: [[ONE:%.+]] = arith.constant 1
// CHECK: [[TWO:%.+]] = arith.constant 2
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
// CHECK: %[[C0_0:.+]] = arith.constant 0
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
// CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
// CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
%0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>) -> (tensor<?x5x6xf32>)
// CHECK: %[[C2:.+]] = arith.constant 2
// CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
// CHECK: %[[C0:.+]] = arith.constant 0
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
// CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
// CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
%0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>)
// CHECK-LABEL: @matmul_dyn_independent_dim
func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
// CHECK: %[[C0:.+]] = arith.constant 0
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
// CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
%0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>)
// CHECK-LABEL: @fully_connected
func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
- // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
+ // CHECK: [[INITT:%.+]] = tensor.empty()
// CHECK: [[ZERO:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
// CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
// CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
- // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
+ // CHECK: [[INITB:%.+]] = tensor.empty()
// CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
// CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
// CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
// CHECK-LABEL: @quantized_fully_connected
func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
- // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
+ // CHECK: [[INITT:%.+]] = tensor.empty()
// CHECK: [[ZERO:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
// CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
// CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
- // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
+ // CHECK: [[INITB:%.+]] = tensor.empty()
// CHECK: [[ONE:%.+]] = arith.constant 1
// CHECK: [[TWO:%.+]] = arith.constant 2
// CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
func.func @fully_connected_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<?x6xf32>) {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6]
+ // CHECK: %[[INITT:.+]] = tensor.empty(%[[DIM]])
// CHECK: %[[ZERO:.+]] = arith.constant 0
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}outs(%[[INITT]]
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]>
// CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
- // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6]
+ // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]])
// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
// CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
// CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
// CHECK-LABEL: @max_pool
func.func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
// CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38
- // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62]
+ // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
// CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[INIT]]
- // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
+ // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
// CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
%0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>)
return
// CHECK-DAG: [[PAD:%.+]] = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
// CHECK-DAG: tensor.yield [[CONST]]
// CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
- // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
+ // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
// CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}outs([[INIT]]
- // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
+ // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
// CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
%0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>)
return
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
// CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 4, 32, 62]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]]
- // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [3, 3]
+ // CHECK: %[[KERNEL:.+]] = tensor.empty()
// CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x6x34x62xf32>, tensor<3x3xf32>) outs(%[[FILL]] : tensor<?x4x32x62xf32>)
%0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<?x6x34x62xf32>) -> (tensor<?x4x32x62xf32>)
return
// CHECK: [[CONST:%.+]] = arith.constant 0
// CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
// CHECK: [[CONST:%.+]] = arith.constant 0
- // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
+ // CHECK: [[POOLINIT:%.+]] = tensor.empty()
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[POOLINIT]]
- // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4]
+ // CHECK: [[KERNEL:%.+]] = tensor.empty()
// CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
// CHECK: [[ZERO:%.0]] = arith.constant 0
// CHECK: [[ONE:%.+]] = arith.constant 1
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
// CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
- // CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62]
+ // CHECK: %[[POOLINIT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[FILL:.+]] = linalg.fill
- // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4]
+ // CHECK: %[[KERNEL:.+]] = tensor.empty()
// CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor<?x8x36x62xf32>, tensor<4x4xf32>) outs(%[[FILL]] : tensor<?x5x33x62xf32>)
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor<?x5x33x62xf32>) outs(%[[INIT]] : tensor<?x5x33x62xf32>)
%0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<?x6x34x62xf32>) -> (tensor<?x5x33x62xf32>)
return %0 : tensor<?x5x33x62xf32>
func.func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
// CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
- // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
+ // CHECK: %[[M_IN:.+]] = tensor.empty()
// CHECK: %[[CST:.+]] = arith.constant 0
// CHECK: %[[FILL:.+]] = linalg.fill
- // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
+ // CHECK: %[[B_IN:.+]] = tensor.empty()
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
// CHECK: arith.addf
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
// CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
- // CHECK: %[[M_IN:.+]] = linalg.init_tensor [%[[BATCH]], 45, 40, 28]
+ // CHECK: %[[M_IN:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[CST:.+]] = arith.constant 0
// CHECK: %[[FILL:.+]] = linalg.fill
- // CHECK: %[[B_IN:.+]] = linalg.init_tensor [%[[BATCH]], 45, 40, 28]
+ // CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<?x45x40x28xf32>)
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) outs(%[[B_IN]] : tensor<?x45x40x28xf32>)
// CHECK: %[[ADD:.+]] = arith.addf
// Running convolution
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
// CHECK: %[[WEIGHT:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
- // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, %[[H_OUT]], %[[W_OUT]], 28]
+ // CHECK: %[[M_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
// CHECK: %[[CST:.+]] = arith.constant 0
// CHECK: %[[FILL:.+]] = linalg.fill
- // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, %[[H_OUT]], %[[W_OUT]], 28]
+ // CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>)
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>)
// CHECK: %[[ADD:.+]] = arith.addf
// CHECK-LABEL: @depthwise_conv
func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
- // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
+ // CHECK: [[OUT:%.+]] = tensor.empty()
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
// CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
func.func @depthwise_conv_dyn(%arg0 : tensor<?x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 5, 3, 11]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[CST0:.+]] = arith.constant 0
// CHECK: %[[FILL:.+]] = linalg.fill
- // CHECK: %[[OUT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 5, 33]
+ // CHECK: %[[OUT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor<?x5x5x3x11xf32>)
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
// CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) outs(%[[OUT]] : tensor<?x5x5x33xf32>) {
// CHECK-LABEL: @depthwise_conv_strides
func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
- // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
+ // CHECK: [[OUT:%.+]] = tensor.empty()
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
// CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
// CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
// CHECK: tensor.yield [[PADV]]
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
- // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512]
+ // CHECK: [[OUT:%.+]] = tensor.empty()
// CHECK: [[C128:%.+]] = arith.constant -128
// CHECK: [[C42:%.+]] = arith.constant 42
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
// CHECK-LABEL: @depthwise_conv_quant_dilations
func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
- // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512]
+ // CHECK: [[OUT:%.+]] = tensor.empty()
// CHECK: [[C128:%.+]] = arith.constant -128
// CHECK: [[C42:%.+]] = arith.constant 42
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
// CHECK-LABEL: @test_abs
func.func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [] : tensor<f32>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%arg0 : tensor<f32>) outs([[INIT]] : tensor<f32>) {
// CHECK: ^bb0(%arg1: f32, %arg2: f32):
// CHECK: [[ELEMENT:%.+]] = math.absf %arg1
// CHECK-LABEL: @test_abs
func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
// CHECK: ^bb0(%arg1: f32, %arg2: f32):
// CHECK: [[ELEMENT:%.+]] = math.absf %arg1
// CHECK-LABEL: @test_abs
func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3] : tensor<2x3xf32>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
// CHECK: ^bb0(%arg1: f32, %arg2: f32):
// CHECK: [[ELEMENT:%.+]] = math.absf %arg1
func.func @test_abs(%arg0: tensor<?xf32>) -> tensor<?xf32> {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
// CHECK: linalg.generic
// CHECK: math.absf
%0 = "tosa.abs"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
func.func @test_abs_dyn(%arg0: tensor<2x?xf32>) -> tensor<2x?xf32> {
// CHECK: %[[C1:.+]] = arith.constant 1
// CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, %[[DIM]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
// CHECK: linalg.generic
// CHECK: math.absf
%0 = "tosa.abs"(%arg0) : (tensor<2x?xf32>) -> tensor<2x?xf32>
// CHECK-LABEL: @test_broadcast
func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
// CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg0
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %arg1 : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
// CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
// CHECK-LABEL: @test_broadcast_swapped_args
func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
// CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg1
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
// CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
// CHECK-LABEL: @test_multibroadcast
func.func @test_multibroadcast(%arg0: tensor<1x3xf32>, %arg1: tensor<2x1xf32>) -> tensor<2x3xf32> {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3] : tensor<2x3xf32>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
// CHECK: [[RESHAPE1:%.+]] = tensor.collapse_shape %arg0 {{\[}}[0, 1]]
// CHECK: [[RESHAPE2:%.+]] = tensor.collapse_shape %arg1 {{\[}}[0, 1]]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) {
// CHECK-SAME: ([[ARG0:%.+]]: tensor<1x2x3xi32>)
func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () {
%0 = arith.constant dense<[1, 2, 0]> : tensor<3xi32>
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3, 1]
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3x1xi32>
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>)
// CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
// CHECK: linalg.yield [[ARG1]]
%0 = arith.constant dense<[1, 3, 0, 2]> : tensor<4xi32>
// CHECK: %[[C1:.+]] = arith.constant 1
// CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 4, 1, 3]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor<?x4x1x3xi32>
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor<?x4x1x3xi32>)
// CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
// CHECK: linalg.yield [[ARG1]]
// CHECK: %[[DIM0:.+]] = tensor.dim %arg0, %[[C0]]
// CHECK: %[[C1:.+]] = arith.constant 1
// CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM1]], %[[DIM0]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs([[OUT:%.+]] : tensor<?x?xf32>)
// CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32)
// CHECK: linalg.yield [[ARG1]]
// CHECK-LABEL: @reduce_float
// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xf32>
func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32>
// CHECK: [[CST0:%.+]] = arith.constant 0.0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>)
// CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xf32> into tensor<1x4xf32>
%0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [5]
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32>
// CHECK: [[CST0:%.+]] = arith.constant 0.0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>)
func.func @reduce_float_dyn(%arg0: tensor<?x5x4xf32>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]], 4]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
// CHECK: %[[CST0:.+]] = arith.constant 0.0
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg0 : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>)
func.func @reduce_float_dyn_nonzero_batch(%arg0: tensor<5x?x4xf32>) -> () {
// CHECK: %[[C1:.+]] = arith.constant 1
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C1]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, %[[DYN]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
// CHECK: %[[CST1:.+]] = arith.constant 1.0
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0 : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>)
func.func @reduce_float_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
// CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
// CHECK-LABEL: @reduce_int
// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi32>
func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>)
// CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xi32> into tensor<1x4xi32>
%0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [5]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant 0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>)
// CHECK-LABEL: @reduce_bool
// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi1>
func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[CST0:%.+]] = arith.constant true
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>)
// CHECK: [[OFFSET:%.+]] = arith.constant 0 : index
// CHECK: [[IDX0:%.+]] = arith.constant 0 : index
// CHECK: [[IDX1:%.+]] = arith.constant 1 : index
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1]
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<11x1xf32>
// CHECK: [[CST:%.+]] = arith.constant 0.0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
// CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]][0, 0] [5, 1] [1, 1]
// CHECK: [[OFFSET:%.+]] = arith.constant 0 : index
// CHECK: [[IDX0:%.+]] = arith.constant 0 : index
// CHECK: [[IDX1:%.+]] = arith.constant 1 : index
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2]
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5x2xf32>
// CHECK: [[CST:%.+]] = arith.constant 0.0
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
// CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]][0, 0] [5, 1] [1, 1]
// CHECK: %[[SIZE:.+]] = tensor.dim %arg0, %[[IDX1]]
// CHECK: %[[IDX1_2:.+]] = arith.constant 1 : index
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[IDX1_2]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [11, %[[DYN]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<11x?xf32>
// CHECK: %[[CST:.+]] = arith.constant 0.0
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
// CHECK: %[[INSERT0:.+]] = tensor.insert_slice %arg0 into %[[FILL]][0, 0] [5, %[[SIZE]]] [1, 1]
// CHECK: %[[IDX0_2:.+]] = arith.constant 0 : index
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[IDX0_2]]
// CHECK: %[[IDX1:.+]] = arith.constant 1 : index
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]], 3]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x3xf32>
// CHECK: %[[CST:.+]] = arith.constant 0.0
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
// CHECK: %[[DYN1:.+]] = tensor.dim %arg0, %[[AXIS]]
func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
// CHECK: [[C0:%.+]] = arith.constant 19689
// CHECK: [[C1:%.+]] = arith.constant 15
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
// CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
// CHECK: [[C17:%.+]] = arith.constant 17
// CHECK: [[C0:%.+]] = arith.constant 19689
// CHECK: [[C1:%.+]] = arith.constant 15
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
// CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8):
// CHECK: [[C17:%.+]] = arith.constant 17
func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 2]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
%0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>) -> (tensor<?x2xi8>)
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 2]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xui8>
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xui8>)
%1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>) -> (tensor<?x2xui8>)
// CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
// CHECK: %[[C2:.+]] = arith.constant 2
// CHECK: %[[DIM2:.+]] = tensor.dim %arg0, %[[C2]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, %[[DIM1]], %[[DIM2]], 32]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
%0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = [1376784203 : i32], output_zp = 0 : i32, per_channel = false, scale32 = true, shift = [38 : i32]} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
return
func.func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
// CHECK: [[C0:%.+]] = arith.constant 19689
// CHECK: [[C1:%.+]] = arith.constant 15
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
// CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8):
// CHECK: [[C17:%.+]] = arith.constant 17
func.func @rescale_per_channel(%arg0 : tensor<3xi8>) -> (tensor<3xi8>) {
// CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
// CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [3]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
// CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
// CHECK: [[C243:%.+]] = arith.constant 243
func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, 4]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
// CHECK-DAG: %[[I0:.+]] = linalg.index 0
// CHECK-DAG: %[[I1:.+]] = linalg.index 1
// CHECK: %[[C1:.+]] = arith.constant 1
// CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C1]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, 4]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
// CHECK-DAG: %[[I0:.+]] = linalg.index 0
// CHECK-DAG: %[[I1:.+]] = linalg.index 1
// CHECK: %[[D0_1:.+]] = tensor.dim %arg0, %[[C0_1]]
// CHECK: %[[C0_2:.+]] = arith.constant 0
// CHECK: %[[D0_2:.+]] = tensor.dim %arg0, %[[C0_2]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[D0_1]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
// CHECK-DAG: %[[I0:.+]] = linalg.index 0
// CHECK-DAG: %[[SUB1:.+]] = arith.constant 1
// CHECK-LABEL: @tile
func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 2, 1, 3]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
// CHECK: linalg.yield %arg1 : i8
// CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1, 2], [3]]
%0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<2x3xi8>) -> (tensor<4x3xi8>)
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 2, 2, 3]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
// CHECK: linalg.yield %arg1 : i8
// CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
%1 = "tosa.tile"(%arg0) {multiples = [1, 2]} : (tensor<2x3xi8>) -> (tensor<2x6xi8>)
- // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2, 7, 3]
+ // CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
// CHECK: linalg.yield %arg1 : i8
// CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
// CHECK: %[[CST0:.+]] = arith.constant 0
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST0]] : tensor<?x3xi8>
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, %[[DYN]], 1, 3]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
// CHECK: linalg.yield %arg1 : i8
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
// CHECK: %[[CST1:.+]] = arith.constant 1
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST1]] : tensor<2x3xi8>
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 2, %[[DYN]], 3]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
// CHECK: linalg.yield %arg1 : i8
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
// CHECK: #[[$MAP4:.*]] = affine_map<(d0) -> ()>
func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
- // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [2]
+ // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
// CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
// CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
- // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [2]
+ // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
// CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
// CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
// CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
// CHECK: linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
%0 = "tosa.argmax"(%arg0) { axis = 0 : i64} : (tensor<3x2xi32>) -> (tensor<2xi32>)
- // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [3]
+ // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
// CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
// CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
- // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [3]
+ // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
// CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
// CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
// CHECK: linalg.generic {indexing_maps = [#map0, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
func.func @argmax_dyn_non_axis(%arg0 : tensor<3x?xi32>) -> () {
// CHECK: %[[CST1:.+]] = arith.constant 1
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST1]]
- // CHECK: %[[IDX_INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+ // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
// CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
// CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
- // CHECK: %[[VAL_INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+ // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
// CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
// CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
// CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
- // CHECK: %[[IDX_INIT:.+]] = linalg.init_tensor [3]
+ // CHECK: %[[IDX_INIT:.+]] = tensor.empty()
// CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
// CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
- // CHECK: %[[VAL_INIT:.+]] = linalg.init_tensor [3]
+ // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
// CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
// CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
// CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
// CHECK-LABEL: @gather_float
func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 3, 2]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
// CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
// CHECK: %[[IDX0:.+]] = linalg.index 0
func.func @gather_float_dyn(%arg0: tensor<?x3x2xf32>, %arg1: tensor<?x3xi32>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 3, 2]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
// CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
// CHECK: %[[IDX0:.+]] = linalg.index 0
// CHECK-LABEL: @gather_int
func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 3, 2]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
// CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: i32)
// CHECK: %[[IDX0:.+]] = linalg.index 0
// CHECK-LABEL: @table8
func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
// CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
// CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
// CHECK-LABEL: @table16
func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
// CHECK: ^bb0(%arg2: i16, %arg3: i32)
// CHECK: %[[EXT_IN:.+]] = arith.extsi %arg2
func.func @table8_dyn(%arg0: tensor<?xi8>, %arg1: tensor<512xi8>) -> () {
// CHECK: %[[CST0:.+]] = arith.constant 0
// CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DYN]]]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
// CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
// CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
// CHECK-LABEL: @table8_dyn_table
func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
// CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
// CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
// CHECK-LABEL: @resize_nearest
func.func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK: %[[IDX0:.+]] = linalg.index 0
// CHECK: %[[IDX1:.+]] = linalg.index 1
// CHECK-LABEL: @resize_bilinear
func.func @resize_bilinear(%input: tensor<1x2x2x1xf32>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK: %[[IDX0:.+]] = linalg.index 0
// CHECK: %[[IDX1:.+]] = linalg.index 1
// CHECK-LABEL: @resize_nearest_int
func.func @resize_nearest_int(%input: tensor<1x2x2x1xi32>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK: %[[IDX0:.+]] = linalg.index 0
// CHECK: %[[IDX1:.+]] = linalg.index 1
// CHECK-LABEL: @resize_bilinear_int
func.func @resize_bilinear_int(%input: tensor<1x2x2x1xi8>) -> () {
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
+ // CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK: %[[IDX0:.+]] = linalg.index 0
func.func @resize_dyn(%input: tensor<?x2x2x1xi8>) -> () {
// CHECK: %[[C0:.+]] = arith.constant 0
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[BATCH]], 4, 4, 1]
+ // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
// CHECK: %[[GENERIC:.+]] = linalg.generic
%output = "tosa.resize"(%input) { output_size = [4, 4], stride = [128, 128], offset = [1, 2], stride_fp = [0. : f32, 0. : f32], offset_fp = [0. : f32, 0. : f32], shift = 8 : i32, mode = "BILINEAR" } : (tensor<?x2x2x1xi8>) -> (tensor<?x4x4x1xi32>)
return
%f0 = arith.constant 0.0: f32
// alloc_tensor itself does not alloc but forwards to the insert_slice.
- // InitTensorOp replaces the alloc_tensor with an inplace extract_slice.
+ // AllocTensorOpElimination replaces the alloc_tensor with an inplace
+ // extract_slice.
// CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
%a = bufferization.alloc_tensor(%sz) : tensor<?xf32>
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
- %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+ %init = tensor.empty() : tensor<1x112x112x32xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%conv = linalg.conv_2d_nhwc_hwcf
}
// CHECK: func @conv_slice
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32>
// CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32>
// CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32>
// CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32>
// The slice is not supposed to be bubbled up when it is rank-reducing.
func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> {
%cst = arith.constant 1.000000e+00 : f32
- %init = linalg.init_tensor [1, %width] : tensor<1x?xf32>
+ %init = tensor.empty(%width) : tensor<1x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32>
%slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
%expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor<?xf32> into tensor<1x1x1x?xf32>
}
// CHECK: func @rank_reducing_slice
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: %[[FILL:.+]] = linalg.fill ins
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
// CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[SLICE]]
#map0 = affine_map<(d0) -> (d0)>
-// Same as above but with linalg.init_tensor op.
+// Same as above but with tensor.empty op.
// CHECK: #map = affine_map<(d0) -> (d0)>
-// CHECK-LABEL: func @init_tensor(
+// CHECK-LABEL: func @empty_tensor(
// CHECK-SAME: %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<?xf32>
// CHECK-DAG: %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[MEMREF]] : memref<?xf32>)
// CHECK-SAME: outs(%[[OUT_BUF]] : memref<?xf32>) {
-func.func @init_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
- %init = linalg.init_tensor [%size] : tensor<?xf32>
+func.func @empty_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
+ %init = tensor.empty(%size) : tensor<?xf32>
%0 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]
// CHECK: return %[[call]]
func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> {
%0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1>
- %1 = linalg.init_tensor [6] : tensor<6xi64>
+ %1 = tensor.empty() : tensor<6xi64>
%2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) {
^bb0(%arg1: i1, %arg2: i64):
%4 = arith.extui %arg1 : i1 to i64
#map0 = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> ()>
func.func @argmax_lowering(%arg0 : tensor<?xf32>) -> tensor<i32> {
- %init0 = linalg.init_tensor [] : tensor<f32>
- %init1 = linalg.init_tensor [] : tensor<i32>
+ %init0 = tensor.empty() : tensor<f32>
+ %init1 = tensor.empty() : tensor<i32>
%0:2 = linalg.generic {
indexing_maps = [#map0, #map1, #map1],
iterator_types = ["reduction"]}
}
// CHECK: func @argmax_lowering(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>
-// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [] : tensor<f32>
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [] : tensor<i32>
+// CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor<f32>
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<i32>
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] :
// CHECK: return %[[GENERIC]]#1
// Do not remove operand needed for loop dim.
func.func @loop_dim_operand(%arg0 : tensor<?xf32>) -> tensor<i32> {
%cst = arith.constant 0 : i32
- %init = linalg.init_tensor [] : tensor<i32>
+ %init = tensor.empty() : tensor<i32>
%fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
%0 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
// Do not remove outs operand needed for loop bound computation.
func.func @loop_dim_outs_operand(%arg0 : index) -> tensor<i32> {
%cst = arith.constant 0 : i32
- %init1 = linalg.init_tensor [%arg0] : tensor<?xi32>
- %init = linalg.init_tensor [] : tensor<i32>
+ %init1 = tensor.empty(%arg0) : tensor<?xi32>
+ %init = tensor.empty() : tensor<i32>
%fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
%0:2 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
}
// CHECK: func @loop_dim_outs_operand(
// CHECK-SAME: %[[ARG0:.+]]: index
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[ARG0]]]
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[ARG0]])
// CHECK: linalg.generic
// CHECK-SAME: outs(%[[INIT]]
// -----
-func.func @init_tensor_canonicalize() -> (tensor<4x5x?xf32>) {
- %c6 = arith.constant 6 : index
- %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
- return %0 : tensor<4x5x?xf32>
-}
-// CHECK: func @init_tensor_canonicalize
-// CHECK: %[[T0:.+]] = linalg.init_tensor [4, 5, 6] : tensor<4x5x6xf32>
-// CHECK: %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32>
-// CHECK: return %[[T1]]
-
-// -----
-
-func.func @init_tensor_reshape_expansion(%arg0 : index) -> tensor<2x3x5x4x?x7xf32> {
- %0 = linalg.init_tensor [6, 5, %arg0] : tensor<6x5x?xf32>
- %1 = tensor.expand_shape %0 [[0, 1], [2], [3, 4, 5]]
- : tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
- return %1 : tensor<2x3x5x4x?x7xf32>
-}
-// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
-// CHECK: func @init_tensor_reshape_expansion
-// CHECK-SAME: %[[ARG0:.+]]: index
-// CHECK-NEXT: %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
-// CHECK-NEXT: %[[INIT:.+]] = linalg.init_tensor [2, 3, 5, 4, %[[D]], 7]
-// CHECK-NEXT: return %[[INIT]]
-
-// -----
-
-func.func @init_tensor_reshape_collapse(%arg0 : index) -> tensor<6x5x?xf32> {
- %0 = linalg.init_tensor [2, 3, 5, 4, %arg0, 7] : tensor<2x3x5x4x?x7xf32>
- %1 = tensor.collapse_shape %0 [[0, 1], [2], [3, 4, 5]]
- : tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
- return %1 : tensor<6x5x?xf32>
-}
-// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
-// CHECK: func @init_tensor_reshape_collapse
-// CHECK-SAME: %[[ARG0:.+]]: index
-// CHECK-NEXT: %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
-// CHECK-NEXT: %[[INIT:.+]] = linalg.init_tensor [6, 5, %[[D]]]
-// CHECK-NEXT: return %[[INIT]]
-
-// -----
-
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
-> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
%0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
%2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
- %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+ %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
%4, %5 = linalg.generic {
indexing_maps = [#map, #map, #map, #map],
iterator_types = ["parallel", "parallel", "parallel"]
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
-> tensor<1x2x3xf32> {
- %out = linalg.init_tensor [1, 2, 3] : tensor<1x2x3xf32>
+ %out = tensor.empty() : tensor<1x2x3xf32>
%g = linalg.generic {
indexing_maps = [#map, #map],
iterator_types = ["parallel", "parallel", "parallel"]
#map = affine_map<() -> ()>
func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
- %out = linalg.init_tensor [] : tensor<f32>
+ %out = tensor.empty() : tensor<f32>
%g = linalg.generic {
indexing_maps = [#map, #map],
iterator_types = []
%cst = arith.constant 1.000000e+00 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
cf.br ^bb1(%cst : f32)
^bb1(%arg1 : f32):
%cst = arith.constant 1.000000e+00 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
cf.br ^bb1(%cst : f32)
^bb1(%arg2 : f32):
// -----
-func.func @fold_init_tensor_with_slice
- (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
-{
- %0 = linalg.init_tensor[%arg0, 10, 40] : tensor<?x10x40xf32>
- %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
- : tensor<?x10x40xf32> to tensor<5x?x20xf32>
- return %1 : tensor<5x?x20xf32>
-}
-// CHECK: func @fold_init_tensor_with_slice
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index
-// CHECK: %[[T0:.+]] = linalg.init_tensor [5, %[[ARG1]], 20]
-// CHECK: return %[[T0]]
-
-// -----
-
-func.func @fold_init_tensor_with_cast(%arg0 : index) -> tensor<1x12xf32> {
- %0 = linalg.init_tensor [%arg0, 12] : tensor<?x12xf32>
- %1 = tensor.cast %0 : tensor<?x12xf32> to tensor<1x12xf32>
- return %1 : tensor<1x12xf32>
-}
-// CHECK: func @fold_init_tensor_with_cast(%[[ARG0:.+]]: index)
-// CHECK: %[[T0:.+]] = linalg.init_tensor [1, 12] : tensor<1x12xf32>
-// CHECK: return %[[T0]] : tensor<1x12xf32>
-
-// -----
-
#accesses = [
affine_map<(i, j) -> (i, j)>
]
%c1 = arith.constant 1 : index
%c21 = arith.constant 21 : index
%c42 = arith.constant 42 : index
- %0 = linalg.init_tensor [%c21, %c42] : tensor<?x?xf32>
+ %0 = tensor.empty(%c21, %c42) : tensor<?x?xf32>
%1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
return %4 : tensor<?x?xf32>
}
// CHECK-LABEL: func @propogate_casts
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [21, 42]
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
// CHECK: %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
// CHECK: %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
%c0 = arith.constant 0 : index
%cst = arith.constant 7.0 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?xf32>
- %1 = linalg.init_tensor [%0] : tensor<?xf32>
- %2 = linalg.init_tensor [%0] : tensor<?xf32>
+ %1 = tensor.empty(%0) : tensor<?xf32>
+ %2 = tensor.empty(%0) : tensor<?xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor<?xf32>, tensor<?xf32>) outs (%2:tensor<?xf32>) {
^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
%4 = arith.addf %arg1, %cst : f32
%cst2 = arith.constant 6.0 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
- %3 = linalg.init_tensor [%1, %0] : tensor<?x?xf32>
- %4 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
+ %3 = tensor.empty(%1, %0) : tensor<?x?xf32>
+ %4 = tensor.empty(%0, %1) : tensor<?x?xf32>
%5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%4:tensor<?x?xf32>) {
^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
%6 = arith.divf %cst1, %cst2 : f32
// CHECK-LABEL: func @fold_fill_reshape()
func.func @fold_fill_reshape() -> tensor<6x4xf32> {
%zero = arith.constant 0.0 : f32
- // CHECK: %[[INIT:.+]] = linalg.init_tensor [6, 4] : tensor<6x4xf32>
- %init = linalg.init_tensor [1, 2, 3, 4] : tensor<1x2x3x4xf32>
+ // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<6x4xf32>
+ %empty = tensor.empty() : tensor<1x2x3x4xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<6x4xf32>) -> tensor<6x4xf32>
- %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
+ %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
%reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]]
: tensor<1x2x3x4xf32> into tensor<6x4xf32>
// CHECK: return %[[FILL]] : tensor<6x4xf32>
return %1 : tensor<?x?xf32>
}
-
-// -----
-
-func.func private @some_use(%i : index, %j : index)
-
-// CHECK-LABEL: func @init_canonicalize
-// CHECK-SAME: %[[I:.*]]: index
-func.func @init_canonicalize(%i : index) {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
-
- // CHECK-NOT: init_tensor
- %0 = linalg.init_tensor [%i, 42] : tensor<?x42xf32>
-
- // CHECK-NOT: tensor.dim
- %1 = tensor.dim %0, %c0: tensor<?x42xf32>
- %2 = tensor.dim %0, %c1: tensor<?x42xf32>
-
- // CHECK: %[[c42:.*]] = arith.constant 42 : index
- // CHECK: call @some_use(%[[I]], %[[c42]])
- call @some_use(%1, %2) : (index, index) -> ()
-
- return
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_reducing_init_extract
-func.func @rank_reducing_init_extract(%sz : index, %idx : index) -> tensor<2xf32> {
- // CHECK: linalg.init_tensor [2] : tensor<2xf32>
- %a = linalg.init_tensor [%sz, 2] : tensor<?x2xf32>
-
- // CHECK-NOT: extract
- %r = tensor.extract_slice %a[%idx, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
- return %r: tensor<2xf32>
-}
-
// -----
// CHECK: func @fold_self_copy
// CHECK-LABEL: func @fold_static_pad_fill
// CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [412, 276] : tensor<412x276xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
// CHECK: return %[[FILL]]
func.func @fold_static_pad_fill() -> tensor<412x276xf32> {
%f0 = arith.constant 0.0 : f32
- %init = linalg.init_tensor [400, 273] : tensor<400x273xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<400x273xf32>) -> tensor<400x273xf32>
+ %empty = tensor.empty() : tensor<400x273xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
%pad = tensor.pad %fill low[4, 1] high[8, 2] {
^bb0(%arg1: index, %arg2: index):
tensor.yield %f0 : f32
// CHECK: %[[S1:.+]] = affine.apply #[[MAP1]]()[%[[DIM1]]]
// CHECK: %[[S2:.+]] = affine.apply #[[MAP2]]()[%[[HIGH2]]]
// CHECK: %[[S3:.+]] = affine.apply #[[MAP3]]()[%[[LOW3]], %[[HIGH3]]]
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : tensor<?x?x?x?xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor<?x?x?x?xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
// CHECK: return %[[FILL]]
-func.func @fold_dynamic_pad_fill(%init: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
+func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
%f0 = arith.constant 0.0 : f32
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
%pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
tensor.yield %f0 : f32
func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
%f0 = arith.constant 0.0 : f32
%f1 = arith.constant 1.0 : f32
- %init = linalg.init_tensor [400, 273] : tensor<400x273xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<400x273xf32>) -> tensor<400x273xf32>
+ %empty = tensor.empty() : tensor<400x273xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
// CHECK: tensor.pad
%pad = tensor.pad %fill low[4, 1] high[8, 2] {
^bb0(%arg1: index, %arg2: index):
%0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
%1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
%2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
- %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+ %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
%4 = linalg.generic {
indexing_maps = [#map, #map, #map],
iterator_types = ["parallel", "parallel", "parallel"]
%0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
%1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
%2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
- %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+ %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
%4 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
%5 = linalg.generic {
indexing_maps = [#map, #map, #map],
%0 = tensor.dim %arg2, %c0 : tensor<2x3x4xf32>
%1 = tensor.dim %arg2, %c1 : tensor<2x3x4xf32>
%2 = tensor.dim %arg2, %c2 : tensor<2x3x4xf32>
- %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+ %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
%4 = tensor.cast %3 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
%5 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
%6 = linalg.generic {
%0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
%1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
%2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
- %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+ %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
%4 = tensor.cast %arg0 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
%5 = tensor.cast %arg1 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
%6 = linalg.generic {
// CHECK-LABEL: func @cast_dest
// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
- %0 = linalg.init_tensor [%arg2, %arg3, %arg4] : tensor<?x?x?xf32>
+ %0 = tensor.empty(%arg2, %arg3, %arg4) : tensor<?x?x?xf32>
%1 = tensor.cast %arg1 : tensor<1x?x?xf32> to tensor<?x?x?xf32>
%2 = linalg.generic {
indexing_maps = [#map, #map, #map],
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [8, 384, 384]
+// CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
// CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP]]()[%[[LOW1]]]
// CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?xf32>
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %f0 : f32
} : tensor<?x?x?xf32> to tensor<8x128x128xf32>
- %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+ %empty = tensor.empty() : tensor<8x384x384xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
%0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
return %0: tensor<8x384x384xf32>
}
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %f0 : f32
} : tensor<7x123x124xf32> to tensor<8x128x128xf32>
- %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+ %empty = tensor.empty() : tensor<8x384x384xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
%0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %f0 : f32
} : tensor<7x123x124xf32> to tensor<8x128x128xf32>
- %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+ %empty = tensor.empty() : tensor<8x384x384xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
%0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%1 = tensor.insert_slice %a into %0 [0, 0, 129] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
// Range overlap with %1 at dim#3
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %f0 : f32
} : tensor<7x123x124xf32> to tensor<8x128x128xf32>
- %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+ %empty = tensor.empty() : tensor<8x384x384xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
%0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%1 = tensor.insert_slice %a into %0 [0, 128, 255] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
// Range overlap with %0 at dim#3
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %f0 : f32
} : tensor<7x123x124xf32> to tensor<8x128x128xf32>
- %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
- %fill = linalg.fill ins(%f0 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+ %empty = tensor.empty() : tensor<8x384x384xf32>
+ %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
// Overlap btween %0 and %1 is fine but not with %2 is fine.
// CHECK-COUNT-3: tensor.insert_slice
%0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %f0 : f32
} : tensor<7x123x124xf32> to tensor<8x128x128xf32>
- %init = linalg.init_tensor [8, 384, 384] : tensor<8x384x384xf32>
+ %empty = tensor.empty() : tensor<8x384x384xf32>
// Different filling value than padding value.
- %fill = linalg.fill ins(%f1 : f32) outs(%init : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+ %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
%0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
%d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
%d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
%d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
- %init1 = linalg.init_tensor [%d1, %d2, %d0] : tensor<?x?x?xf32>
- %init2 = linalg.init_tensor [%d2, %d1, %d0] : tensor<?x?x?xf32>
+ %empty1 = tensor.empty(%d1, %d2, %d0) : tensor<?x?x?xf32>
+ %empty2 = tensor.empty(%d2, %d1, %d0) : tensor<?x?x?xf32>
%0:2 = linalg.generic {
iterator_types = ["parallel", "parallel", "parallel"],
indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2) -> (d1, d2, d0)>,
affine_map<(d0, d1, d2) -> (d2, d1, d0)>]}
- ins(%arg0 : tensor<?x?x?xf32>) outs(%init1, %init2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+ ins(%arg0 : tensor<?x?x?xf32>) outs(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
^bb0(%b0 : f32, %b1 : f32, %b2 : f32) :
linalg.yield %b0, %b0 : f32, f32
} -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
}
// CHECK: func @fold_multi_use_generic_op_with_consumer
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<2x3x4xf32>
// CHECK-DAG: %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<4x3x2xf32>
-// CHECK-DAG: %[[INIT2:.+]] = linalg.init_tensor [3, 2, 4] : tensor<3x2x4xf32>
+// CHECK-DAG: %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK-SAME: ins(%[[CAST]] :
// CHECK-SAME: outs(%[[INIT2]], %[[INIT1]] :
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<f32>
func.func @cmpf(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
- // CHECK: %[[INIT:.*]] = linalg.init_tensor [] : tensor<i1>
+ // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<i1>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[INIT]]
// CHECK: %[[D2:.*]] = tensor.dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
// CHECK: %[[C5:.*]] = arith.constant 5 : index
// CHECK: %[[D5:.*]] = tensor.dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
- // CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[D1]], %[[D2]], 8, 2, %[[D5]]] : tensor<4x?x?x8x2x?xi1>
+ // CHECK: %[[INIT:.*]] = tensor.empty(%[[D1]], %[[D2]], %[[D5]]) : tensor<4x?x?x8x2x?xi1>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[INIT]]
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %init1 = linalg.init_tensor [%d1, %d0] : tensor<?x?xf32>
- %init2 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
+ %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%result:2 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-// CHECK-DAG: %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+// CHECK-DAG: %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CHECK-DAG: %[[GENERIC1:.+]]:3 = linalg.generic
// CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]]
// CHECK-SAME: ["parallel", "parallel"]
// CANONICALIZECHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CANONICALIZECHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CANONICALIZECHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-// CANONICALIZECHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-// CANONICALIZECHECK-DAG: %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CANONICALIZECHECK-DAG: %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+// CANONICALIZECHECK-DAG: %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CANONICALIZECHECK-DAG: %[[GENERIC1:.+]] = linalg.generic
// CANONICALIZECHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
// CANONICALIZECHECK-SAME: ["parallel", "parallel"]
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %init1 = linalg.init_tensor [%d1, %d0] : tensor<?x?xf32>
- %init2 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
+ %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%result:3 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-// CHECK-DAG: %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+// CHECK-DAG: %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CHECK-DAG: %[[GENERIC1:.+]]:4 = linalg.generic
// CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]]
// CHECK-SAME: ["parallel", "parallel"]
// CANONICALIZECHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CANONICALIZECHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CANONICALIZECHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-// CANONICALIZECHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
-// CANONICALIZECHECK-DAG: %[[INIT2:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CANONICALIZECHECK-DAG: %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
+// CANONICALIZECHECK-DAG: %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CANONICALIZECHECK-DAG: %[[GENERIC1:.+]]:2 = linalg.generic
// CANONICALIZECHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
// CANONICALIZECHECK-SAME: ["parallel", "parallel"]
#map1 = affine_map<(d0, d1) -> (d0)>
#map2 = affine_map<(d0, d1) -> (d1, d0)>
func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) -> tensor<20x10xf64> {
- %init = linalg.init_tensor [20, 10] : tensor<20x10xf64>
+ %init = tensor.empty() : tensor<20x10xf64>
%0 = linalg.generic {
indexing_maps = [#map0, #map1, #map2],
iterator_types = ["parallel", "parallel"]}
// CHECK: func @multi_statement(
// CHECK-SAME: %[[ARG0:.+]]: tensor<10x20xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<10xi32>)
-// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [20, 10] : tensor<20x10xf64>
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [10, 20] : tensor<10x20xf64>
+// CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor<20x10xf64>
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<10x20xf64>
// CHECK: %[[GENERIC0:.+]]:2 = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel"]
// CANONICALIZECHECK: func @multi_statement(
// CANONICALIZECHECK-SAME: %[[ARG0:.+]]: tensor<10x20xf32>
// CANONICALIZECHECK-SAME: %[[ARG1:.+]]: tensor<10xi32>)
-// CANONICALIZECHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [20, 10] : tensor<20x10xf64>
-// CANONICALIZECHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [10, 20] : tensor<10x20xf64>
+// CANONICALIZECHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor<20x10xf64>
+// CANONICALIZECHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<10x20xf64>
// CANONICALIZECHECK: %[[GENERIC0:.+]] = linalg.generic
// CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
// CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"]
#map = affine_map<() -> ()>
func.func @detensor_simple(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
- %0 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
// CHECK: return %[[new_tensor_res]]
func.func @detensor_op_sequence(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
- %0 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
linalg.yield %2 : f32
} -> tensor<f32>
- %3 = linalg.init_tensor [] : tensor<f32>
+ %3 = tensor.empty() : tensor<f32>
%4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %1 : tensor<f32>, tensor<f32>)
outs(%3 : tensor<f32>) {
linalg.yield %5 : f32
} -> tensor<f32>
- %6 = linalg.init_tensor [] : tensor<f32>
+ %6 = tensor.empty() : tensor<f32>
%7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%1, %4 : tensor<f32>, tensor<f32>)
outs(%6 : tensor<f32>) {
// CHECK: return %[[new_tensor_res]]
func.func @detensor_multiple_ops(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
- %0 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
// CHECK: return %[[new_tensor_res]]
func.func @detensor_foreign_op(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> attributes {iree.module.export} {
- %0 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
%arg1_t = tensor.from_elements %arg1 : tensor<i32>
%cst = arith.constant dense<10> : tensor<i32>
- %2 = linalg.init_tensor [] : tensor<i8>
+ %2 = tensor.empty() : tensor<i8>
%3 = linalg.generic
{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
ins(%arg0_t : tensor<i1>)
%5 = arith.trunci %4 : i8 to i1
cf.cond_br %5, ^bb1, ^bb2(%arg1_t : tensor<i32>)
^bb1:
- %6 = linalg.init_tensor [] : tensor<i32>
+ %6 = tensor.empty() : tensor<i32>
%7 = linalg.generic
{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
ins(%arg1_t, %cst : tensor<i32>, tensor<i32>)
cf.br ^bb1(%0 : tensor<i32>)
^bb1(%2: tensor<i32>): // 2 preds: ^bb0, ^bb2
- %3 = linalg.init_tensor [] : tensor<i1>
+ %3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%2, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
^bb2(%6: tensor<i32>): // pred: ^bb1
- %7 = linalg.init_tensor [] : tensor<i32>
+ %7 = tensor.empty() : tensor<i32>
%8 = linalg.generic #attrs
ins(%6, %6 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
cf.br ^bb1(%0 : tensor<i32>)
^bb1(%2: tensor<i32>): // 2 preds: ^bb0, ^bb2
- %3 = linalg.init_tensor [] : tensor<i1>
+ %3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%2, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
^bb2(%6: tensor<i32>): // pred: ^bb1
- %7 = linalg.init_tensor [] : tensor<i32>
+ %7 = tensor.empty() : tensor<i32>
%8 = linalg.generic #attrs
ins(%6, %6 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
cf.br ^bb1(%0 : tensor<i32>)
^bb1(%2: tensor<i32>): // 2 preds: ^bb0, ^bb2
- %3 = linalg.init_tensor [] : tensor<i1>
+ %3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%2, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
^bb2(%6: tensor<i32>): // pred: ^bb1
%12 = tensor.from_elements %c10 : tensor<i32>
- %7 = linalg.init_tensor [] : tensor<i32>
+ %7 = tensor.empty() : tensor<i32>
%8 = linalg.generic #attrs
ins(%6, %12 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
func.func @main(%farg0 : tensor<i32>) -> (tensor<i1>) attributes {} {
%c10 = arith.constant 10 : i32
%1 = tensor.from_elements %c10 : tensor<i32>
- %3 = linalg.init_tensor [] : tensor<i1>
+ %3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%farg0, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
// DET-CF-LABEL: func @main(%{{.*}}: tensor<i32>)
// DET-CF-NEXT: arith.constant dense<10> : tensor<i32>
-// DET-CF-NEXT: linalg.init_tensor [] : tensor<i1>
+// DET-CF-NEXT: tensor.empty() : tensor<i1>
// DET-CF-NEXT: linalg.generic
// DET-CF-NEXT: ^{{.*}}(%{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i1)
// DET-CF-NEXT: arith.cmpi slt, %{{.*}}, %{{.*}}
cf.br ^bb1(%farg0 : tensor<i32>)
^bb1(%0: tensor<i32>): // 2 preds: ^bb0, ^bb2
- %1 = linalg.init_tensor [] : tensor<i1>
+ %1 = tensor.empty() : tensor<i1>
%2 = linalg.generic #attrs
ins(%0, %farg1 : tensor<i32>, tensor<i32>)
outs(%1 : tensor<i1>) {
cf.cond_br %3, ^bb2(%0 : tensor<i32>), ^bb3(%0 : tensor<i32>)
^bb2(%4: tensor<i32>): // pred: ^bb1
- %5 = linalg.init_tensor [] : tensor<i32>
+ %5 = tensor.empty() : tensor<i32>
%6 = linalg.generic #attrs
ins(%4, %4 : tensor<i32>, tensor<i32>)
outs(%5 : tensor<i32>) {
cf.br ^bb1(%farg0 : tensor<10xi32>)
^bb1(%0: tensor<10xi32>): // 2 preds: ^bb0, ^bb2
- %1 = linalg.init_tensor [] : tensor<i32>
+ %1 = tensor.empty() : tensor<i32>
%2 = linalg.generic #sum_reduction_attrs
ins(%0: tensor<10xi32>)
outs(%1: tensor<i32>) {
linalg.yield %b : i32
} -> tensor<i32>
- %3 = linalg.init_tensor [] : tensor<i1>
+ %3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%2, %farg1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
^bb2(%6: tensor<i32>): // pred: ^bb1
- %7 = linalg.init_tensor [10] : tensor<10xi32>
+ %7 = tensor.empty() : tensor<10xi32>
%9 = linalg.generic #broadcast_attrs
ins(%6: tensor<i32>)
outs(%7: tensor<10xi32>) {
// DET-ALL-SAME: (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
// DET-ALL: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
// DET-ALL: ^[[bb1]](%{{.*}}: tensor<10xi32>)
-// DET-ALL: linalg.init_tensor [] : tensor<i32>
+// DET-ALL: tensor.empty() : tensor<i32>
// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
// DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32):
// DET-ALL: %{{.*}} = arith.addi %{{.*}}, %{{.*}}
// DET-ALL: cf.cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32)
// DET-ALL: ^[[bb2]](%{{.*}}: i32)
// DET-ALL: tensor.from_elements %{{.*}} : tensor<i32>
-// DET-ALL: linalg.init_tensor [10] : tensor<10xi32>
+// DET-ALL: tensor.empty() : tensor<10xi32>
// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
// DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32):
// DET-ALL: linalg.yield %{{.*}} : i32
cf.br ^bb1(%reshaped0 : tensor<i32>)
^bb1(%2: tensor<i32>): // 2 preds: ^bb0, ^bb2
- %3 = linalg.init_tensor [] : tensor<i1>
+ %3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%2, %reshaped1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3
^bb2(%6: tensor<i32>): // pred: ^bb1
- %7 = linalg.init_tensor [] : tensor<i32>
+ %7 = tensor.empty() : tensor<i32>
%8 = linalg.generic #attrs
ins(%6, %6 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
#map1 = affine_map<(d0, d1, d2) -> (d2)>
func.func @fold_unit_dim_tensor_reshape_op(%arg0 : tensor<5xf32>) -> tensor<2x5xf32>
{
- %1 = linalg.init_tensor [1, 2, 5] : tensor<1x2x5xf32>
+ %1 = tensor.empty() : tensor<1x2x5xf32>
%2 = linalg.generic {i64, indexing_maps = [#map1, #map0],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) {
// -----
-func.func @fold_unit_dim_for_init_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
+func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
%cst = arith.constant 0.0 : f32
- %init = linalg.init_tensor [1] : tensor<1xf32>
+ %init = tensor.empty() : tensor<1xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32>
%add = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ()>
-// CHECK: func @fold_unit_dim_for_init_tensor
+// CHECK: func @fold_unit_dim_for_empty_tensor
// CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32>
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [] : tensor<f32>
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<f32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<f32>) -> tensor<f32>
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]]
%cst = arith.constant 1.000000e+00 : f32
%c3 = arith.constant 3 : index
%0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32>
- %1 = linalg.init_tensor [1, %0] : tensor<1x?xf32>
+ %1 = tensor.empty(%0) : tensor<1x?xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32>
%3 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
// CHECK: func @unit_dim_for_reduction
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x?xf32>
// CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]]
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
// CHECK: %[[RESULT:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]]
func.func @unit_dim_for_both_reduction(%arg0: tensor<1x?x1x1xf32>) -> tensor<1x1xf32> {
%cst = arith.constant 1.000000e+00 : f32
%c3 = arith.constant 3 : index
- %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32>
+ %1 = tensor.empty() : tensor<1x1xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
%3 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
// CHECK: func @unit_dim_for_both_reduction
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x1xf32>
// CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3]
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [1] : tensor<1xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
// CHECK: %[[RESULT:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]]]
%cst = arith.constant 1.000000e+00 : f32
%c2 = arith.constant 2 : index
%0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32>
- %1 = linalg.init_tensor [%0, 1] : tensor<?x1xf32>
+ %1 = tensor.empty(%0) : tensor<?x1xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?x1xf32>) -> tensor<?x1xf32>
%3 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
// CHECK: func @unit_dim_for_reduction_inner
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x1xf32>
// CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]]
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
// CHECK: %[[RESULT:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]]
#CSR = #sparse_tensor.encoding<{ dimLevelType = ["dense", "compressed"] }>
func.func @sparse_case(%arg0: tensor<8x8xf32, #CSR>, %arg1: tensor<8xf32>) -> tensor<8xf32> {
- %0 = linalg.init_tensor [8] : tensor<8xf32>
+ %0 = tensor.empty() : tensor<8xf32>
%1 = linalg.generic #matvec
ins(%arg0, %arg1: tensor<8x8xf32, #CSR>, tensor<8xf32>)
outs(%0: tensor<8xf32>) {
}
// CHECK-LABEL: func @sparse_case
-// CHECK-NEXT: linalg.init_tensor
+// CHECK-NEXT: tensor.empty
// CHECK-NEXT: linalg.generic
// -----
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = linalg.init_tensor [4, 2] : tensor<4x2xf32>
+ %0 = tensor.empty() : tensor<4x2xf32>
%res = scf.foreach_thread (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) {
- %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32>
+ %1 = tensor.empty() : tensor<1x1xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
scf.foreach_thread.perform_concurrently {
// CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}}
%arg1 : tensor<2x3x4xi32>, %arg2 : tensor<5x6x7x8xi32>) -> tensor<2x3x4x5x6x7x8x9xi32> {
%expand = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6], [7]]
: tensor<2x12x5x336x9xi32> into tensor<2x3x4x5x6x7x8x9xi32>
- %init = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9] : tensor<2x3x4x5x6x7x8x9xi32>
+ %init = tensor.empty() : tensor<2x3x4x5x6x7x8x9xi32>
%generic = linalg.generic {
indexing_maps = [#map0, #map1, #map2, #map3],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x12x5x336x9xi32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<2x3x4xi32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<5x6x7x8xi32>
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9]
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty()
// CHECK-DAG: %[[ARG1_RESHAPE:.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0], [1, 2]{{\]}}
// CHECK-DAG: %[[ARG2_RESHAPE:.+]] = tensor.collapse_shape %[[ARG2]] {{\[}}[0], [1, 2, 3]{{\]}}
// CHECK-DAG: %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}}
%arg1 : tensor<2x3x4xi32>, %arg2 : tensor<5x6x7x8xi32>) -> tensor<2x3x4x5x6x7x8x9xi32> {
%expand = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6], [7]]
: tensor<2x12x5x336x9xi32> into tensor<2x3x4x5x6x7x8x9xi32>
- %init = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9] : tensor<2x3x4x5x6x7x8x9xi32>
+ %init = tensor.empty() : tensor<2x3x4x5x6x7x8x9xi32>
%generic = linalg.generic {
indexing_maps = [#map0, #map1, #map2, #map3],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
%arg1 : tensor<7x8x2xi32>, %arg2 : tensor<6x3x4x5xi32>) -> tensor<2x3x4x5x6x7x8x9xi32> {
%expand = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6], [7]]
: tensor<9x56x2x60x6xi32> into tensor<9x7x8x2x3x4x5x6xi32>
- %init = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9] : tensor<2x3x4x5x6x7x8x9xi32>
+ %init = tensor.empty() : tensor<2x3x4x5x6x7x8x9xi32>
%generic = linalg.generic {
indexing_maps = [#map0, #map1, #map2, #map3],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
// CHECK-SAME: %[[ARG0:.+]]: tensor<9x56x2x60x6xi32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<7x8x2xi32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<6x3x4x5xi32>
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [2, 3, 4, 5, 6, 7, 8, 9]
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty()
// CHECK-DAG: %[[ARG1_RESHAPE:.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0, 1], [2]{{\]}}
// CHECK-DAG: %[[ARG2_RESHAPE:.+]] = tensor.collapse_shape %[[ARG2]] {{\[}}[0], [1, 2, 3]{{\]}}
// CHECK-DAG: %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}}
%d4 = tensor.dim %arg2, %c0 : tensor<?x?x?x?xi32>
%d6 = tensor.dim %arg1, %c1 : tensor<?x?x?xi32>
%d7 = tensor.dim %arg0, %c0 : tensor<?x?x?x?x?xi32>
- %init = linalg.init_tensor [%d0, 3, %d2, 5, %d4, 7, %d6, %d7] : tensor<?x3x?x5x?x7x?x?xi32>
+ %init = tensor.empty(%d0, %d2, %d4, %d6, %d7) : tensor<?x3x?x5x?x7x?x?xi32>
%generic = linalg.generic {
indexing_maps = [#map0, #map1, #map2, #map3],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
func.func @no_fuse_unpreserved_folding(%arg0 : tensor<2x12x5xf32>, %arg1 : tensor<2x3xf32>) -> tensor<2x3x4x5xf32> {
%0 = tensor.expand_shape %arg0 [[0], [1, 2], [3]] : tensor<2x12x5xf32> into tensor<2x3x4x5xf32>
- %init = linalg.init_tensor [2, 3, 4, 5] : tensor<2x3x4x5xf32>
+ %init = tensor.empty(): tensor<2x3x4x5xf32>
%1 = linalg.generic {
indexing_maps = [#map0, #map1, #map0],
iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>
func.func @no_fuse_unpreserved_folding_transpose(%arg0 : tensor<2x12x5xf32>, %arg1 : tensor<2xf32>) -> tensor<2x4x3x5xf32> {
%0 = tensor.expand_shape %arg0 [[0], [1, 2], [3]] : tensor<2x12x5xf32> into tensor<2x3x4x5xf32>
- %init = linalg.init_tensor [2, 4, 3, 5] : tensor<2x4x3x5xf32>
+ %init = tensor.empty() : tensor<2x4x3x5xf32>
%1 = linalg.generic {
indexing_maps = [#map0, #map1, #map2],
iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d3)>
func.func @no_fuse_mismatched_iterator_types(%arg0 : tensor<2x12x5xf32>, %arg1 : tensor<2x3xf32>) -> tensor<2x5xf32> {
%0 = tensor.expand_shape %arg0 [[0], [1, 2], [3]] : tensor<2x12x5xf32> into tensor<2x3x4x5xf32>
- %init = linalg.init_tensor [2, 5] : tensor<2x5xf32>
+ %init = tensor.empty() : tensor<2x5xf32>
%1 = linalg.generic {
indexing_maps = [#map0, #map1, #map2],
iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
func.func @control_fusion(%arg0 : tensor<6xf32>, %arg1 : tensor<20xf32>) -> tensor<2x3x4x5xf32> {
%0 = tensor.expand_shape %arg0 [[0, 1]] : tensor<6xf32> into tensor<2x3xf32>
%1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<20xf32> into tensor<4x5xf32>
- %init = linalg.init_tensor [2, 3, 4, 5] : tensor<2x3x4x5xf32>
+ %init = tensor.empty() : tensor<2x3x4x5xf32>
%2 = linalg.generic {
indexing_maps = [#map0, #map1, #map2],
iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
// CONTROL-SAME: %[[ARG0:.+]]: tensor<6xf32>
// CONTROL-SAME: %[[ARG1:.+]]: tensor<20xf32>
// CONTROL: %[[EXPAND:.+]] = tensor.expand_shape %[[ARG0]]
-// CONTROL: %[[INIT:.+]] = linalg.init_tensor [2, 3, 4, 5]
+// CONTROL: %[[INIT:.+]] = tensor.empty()
// CONTROL: %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]{{\]}}
// CONTROL: %[[GENERIC:.+]] = linalg.generic
// CONTROL-SAME: ins(%[[EXPAND]], %[[ARG1]] :
#map = affine_map<(d0) -> (d0)>
func.func @zero_D_test(%arg0: tensor<f32>) -> tensor<1xf32> {
%0 = tensor.expand_shape %arg0 [] : tensor<f32> into tensor<1xf32>
- %init = linalg.init_tensor [1] : tensor<1xf32>
+ %init = tensor.empty() : tensor<1xf32>
%1 = linalg.generic {
indexing_maps = [#map, #map],
iterator_types = ["parallel"]}
%0 = tensor.expand_shape %arg0 [[0, 1], [2, 3]] : tensor<?x?xi32> into tensor<?x4x?x8xi32>
%d0 = tensor.dim %0, %c0 : tensor<?x4x?x8xi32>
%d1 = tensor.dim %0, %c2 : tensor<?x4x?x8xi32>
- %init = linalg.init_tensor [%d1, 8, %d0, 4] : tensor<?x8x?x4xi32>
+ %init = tensor.empty(%d1, %d0) : tensor<?x8x?x4xi32>
%1 = linalg.generic {
indexing_maps = [#map0, #map1],
iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xi32>)
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: %[[COLLAPSE_INIT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2, 3]{{\]}}
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%0 = tensor.expand_shape %arg0 [[0, 1], [2, 3]] : tensor<?x?xi32> into tensor<?x4x?x8xi32>
- %init = linalg.init_tensor [] : tensor<i32>
+ %init = tensor.empty() : tensor<i32>
%1 = linalg.generic {
indexing_maps = [#map0, #map1],
iterator_types = ["reduction", "reduction", "reduction", "reduction"]}
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
outs(%2 : tensor<?x?xf32>) {
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?xf32>
- %1 = linalg.init_tensor [%0] : tensor<?xf32>
+ %1 = tensor.empty(%0) : tensor<?xf32>
%2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
outs(%1 : tensor<?xf32>) {
// CHECK: linalg.generic {
// CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP1]], [[$MAP0]], [[$MAP0]]
%3 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
- %4 = linalg.init_tensor [%0, %3] : tensor<?x?xf32>
+ %4 = tensor.empty(%0, %3) : tensor<?x?xf32>
%5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
outs(%4 : tensor<?x?xf32>){
// CHECK-LABEL: @add_mul_scalar_fusion
func.func @add_mul_scalar_fusion(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32>
{
- %0 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
%cst = arith.constant dense<42.0> : tensor<5xf32>
%0 = tensor.dim %arg0, %c1 : tensor<5x?x?xf32>
%1 = tensor.dim %arg0, %c2 : tensor<5x?x?xf32>
- %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<5x?x?xf32>
%3 = linalg.generic {
indexing_maps = [#map0, #map1, #map1],
iterator_types = ["parallel", "parallel", "parallel"]}
%cst = arith.constant dense<42.0> : tensor<f32>
%0 = tensor.dim %arg0, %c1 : tensor<5x?x?xf32>
%1 = tensor.dim %arg0, %c2 : tensor<5x?x?xf32>
- %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<5x?x?xf32>
%3 = linalg.generic {
indexing_maps = [#map0, #map1, #map1],
iterator_types = ["parallel", "parallel", "parallel"]}
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xi32>
%3 = linalg.generic {
indexing_maps = [#map0, #map0, #map0],
iterator_types = ["parallel", "parallel"] }
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xi32>
%3 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel", "parallel"] }
%c1 = arith.constant 1 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xi32>
%3 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel", "parallel"] }
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?xi32>
- %0 = linalg.init_tensor [%d0] : tensor<?xi32>
+ %0 = tensor.empty(%d0) : tensor<?xi32>
%1 = linalg.generic
{indexing_maps = [#map1, #map1],
iterator_types = ["parallel"]}
} -> tensor<?xi32>
%2 = tensor.dim %arg1, %c0 : tensor<?x?xi32>
%3 = tensor.dim %arg1, %c1 : tensor<?x?xi32>
- %4 = linalg.init_tensor [%2, %3] : tensor<?x?xi32>
+ %4 = tensor.empty(%2, %3) : tensor<?x?xi32>
%5 = linalg.generic
{indexing_maps = [#map2, #map3, #map2],
iterator_types = ["parallel", "parallel"]}
{
%c0 = arith.constant 0 : index
%cst = arith.constant dense<1.000000e+00> : tensor<10xf32>
- %0 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.generic
{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>],
iterator_types = []}
%4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32>
linalg.yield %4 : f32
} -> tensor<f32>
- %2 = linalg.init_tensor [10] : tensor<10xf32>
+ %2 = tensor.empty() : tensor<10xf32>
%3 = linalg.generic
{indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
func.func @constant_fusion(%arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
%cst = arith.constant dense<1.0> : tensor<4xf32>
- %1 = linalg.init_tensor [4] : tensor<4xf32>
+ %1 = tensor.empty() : tensor<4xf32>
%2 = linalg.generic
{indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0) -> (d0)>
// CHECK: func @constant_fusion(%[[ARG0:.+]]: tensor<4xf32>)
// CHECK-DAG: %[[CST:.+]] = arith.constant 1.000000e+00 : f32
-// CHECK-DAG: %[[T0:.+]] = linalg.init_tensor [4] : tensor<4xf32>
+// CHECK-DAG: %[[T0:.+]] = tensor.empty() : tensor<4xf32>
// CHECK: %[[T1:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]]
// CHECK-SAME: ins(%[[ARG0]] : tensor<4xf32>)
func.func @consumer_with_reduction(%arg0: tensor<1x10xf32>,
%arg1: tensor<1x10xf32>,
%arg2: tensor<1xf32>) -> tensor<1xf32> {
- %init = linalg.init_tensor [1, 10] : tensor<1x10xf32>
+ %init = tensor.empty() : tensor<1x10xf32>
%0 = linalg.generic
{indexing_maps = [#map0, #map0, #map0],
iterator_types = ["parallel", "parallel"]}
%shape = shape.shape_of %0 : tensor<?x1xf32> -> tensor<?xindex>
%extend = shape.to_extent_tensor %shape : tensor<?xindex> -> tensor<2xindex>
%extracted = tensor.extract %extend[%c0] : tensor<2xindex>
- %init0 = linalg.init_tensor [%extracted, 1] : tensor<?x1xf32>
+ %init0 = tensor.empty(%extracted) : tensor<?x1xf32>
%1 = linalg.generic {indexing_maps = [
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]
linalg.yield %cp5 : f32
} -> tensor<?x1xf32>
%d0 = tensor.dim %0, %c0 : tensor<?x1xf32>
- %init1 = linalg.init_tensor [%d0, 1] : tensor<?x1xf32>
+ %init1 = tensor.empty(%d0) : tensor<?x1xf32>
%2 = linalg.generic {indexing_maps = [
affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, d1)>,
// CHECK-SAME: ins(%[[CONST]] : tensor<3x2xf32>)
// CHECK: return %[[RESULT]]
%three = arith.constant dense<3.0> : tensor<3x2xf32>
- %init = linalg.init_tensor [3] : tensor<3xf32>
+ %init = tensor.empty() : tensor<3xf32>
%result = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CHECK: %[[GENERIC1:.+]] = linalg.generic
// CHECK-SAME: outs(%[[INIT]] : tensor<?x?xf32>)
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]]
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CHECK: %[[RESULT:.+]] = linalg.generic
// CHECK-SAME: outs(%[[INIT]] : tensor<?x?xf32>)
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %0 = linalg.init_tensor[%d0, %d1] : tensor<?x?xf32>
- %1 = linalg.init_tensor[%d0, %d1] : tensor<?x?xi32>
+ %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
+ %1 = tensor.empty(%d0, %d1) : tensor<?x?xi32>
%2:2 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> ()>,
// CHECK: linalg.generic
func.func @no_fusion_missing_reduction_shape(%arg0: tensor<f32>, %arg1: index) -> tensor<?xf32> {
%cst = arith.constant 0xFF800000 : f32
- %4 = linalg.init_tensor [%arg1, %arg1] : tensor<?x?xf32>
+ %4 = tensor.empty(%arg1, %arg1) : tensor<?x?xf32>
%5 = linalg.generic {
indexing_maps = [#map0, #map1],
iterator_types = ["parallel", "parallel"]
^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<?x?xf32>
- %6 = linalg.init_tensor [%arg1] : tensor<?xf32>
+ %6 = tensor.empty(%arg1) : tensor<?xf32>
%7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
%8 = linalg.generic {
indexing_maps = [#map2, #map3],
%23 = arith.index_cast %22 : index to i64
linalg.yield %23 : i64
} -> tensor<5000xi64>
- %1 = linalg.init_tensor [5000] : tensor<5000xi32>
+ %1 = tensor.empty() : tensor<5000xi32>
%2 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>],
iterator_types = ["parallel", "parallel"]}
// CHECK: func @fusion_different_axes(
// CHECK-SAME: %[[ARG0:.+]]: tensor<5000xi64>
// CHECK-SAME: %[[ARG1:.+]]: tensor<5000xi32>
-// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [5000] : tensor<5000xi64>
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [5000] : tensor<5000xi32>
+// CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor<5000xi64>
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<5000xi32>
// CHECK: %[[RESULT:.+]]:2 = linalg.generic
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] :
%c0 = arith.constant 0 : index
%cst = arith.constant 7.0 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?xf32>
- %1 = linalg.init_tensor [%0] : tensor<?xf32>
+ %1 = tensor.empty(%0) : tensor<?xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
- %3 = linalg.init_tensor [%0] : tensor<?xf32>
+ %3 = tensor.empty(%0) : tensor<?xf32>
%4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor<?xf32>, tensor<?xf32>) outs (%3:tensor<?xf32>) {
^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
%5 = arith.addf %arg1, %arg2 : f32
%cst2 = arith.constant 6.0 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
%3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- %4 = linalg.init_tensor [%1, %0] : tensor<?x?xf32>
+ %4 = tensor.empty(%1, %0) : tensor<?x?xf32>
%5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
- %6 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %6 = tensor.empty(%0, %1) : tensor<?x?xf32>
%7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%6:tensor<?x?xf32>) {
^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
%8 = arith.divf %arg1, %arg2 : f32
#map = affine_map<() -> ()>
module {
func.func @fuse_multi_result_producer(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<f32>, %arg3: tensor<f32>, %arg4: tensor<f32>) -> tensor<f32> {
- %0 = linalg.init_tensor [] : tensor<f32>
- %1 = linalg.init_tensor [] : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
+ %1 = tensor.empty() : tensor<f32>
%2:2 = linalg.generic {
indexing_maps = [#map, #map, #map, #map, #map], iterator_types = []}
ins(%arg0, %arg1, %arg1 : tensor<f32>, tensor<f32>, tensor<f32>) outs(%0, %1 : tensor<f32>, tensor<f32>) {
// CHECK-LABEL: func.func @fuse_multi_result_producer
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<f32>
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] :
// CHECK-SAME: outs(%[[INIT]] :
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%0 = linalg.generic #binary2Dpointwise
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%init : tensor<?x?xf32>) {
// CHECK-LABEL: func @reshape_multiple
// CHECK-SAME: (%[[A:.*]]: tensor<12544x16xf32>, %[[B:.*]]: tensor<12544x16xf32>, %[[C:.*]]: tensor<16xf32>)
-// CHECK: %[[I:.*]] = linalg.init_tensor [112, 112, 16] : tensor<112x112x16xf32>
+// CHECK: %[[I:.*]] = tensor.empty() : tensor<112x112x16xf32>
// CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32>
// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
// CHECK-SAME: iterator_types = ["parallel", "parallel"]}
: tensor<12544x16xf32> into tensor<112x112x16xf32>
%1 = tensor.expand_shape %B [[0, 1], [2]]
: tensor<12544x16xf32> into tensor<112x112x16xf32>
- %2 = linalg.init_tensor [112, 112, 16] : tensor<112x112x16xf32>
+ %2 = tensor.empty() : tensor<112x112x16xf32>
%3 = linalg.generic {indexing_maps = [
affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
func.func @reshape_negative(%A: tensor<12544x16xf32>, %B: tensor<112xf32>) -> tensor<112x112x16xf32> {
%20 = tensor.expand_shape %A [[0, 1], [2]]
: tensor<12544x16xf32> into tensor<112x112x16xf32>
- %21 = linalg.init_tensor [112, 112, 16] : tensor<112x112x16xf32>
+ %21 = tensor.empty() : tensor<112x112x16xf32>
%22 = linalg.generic {indexing_maps = [
affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d1)>,
affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
%cst_8 = arith.constant 1.1920929E-7 : f32
%25 = tensor.expand_shape %arg0 [[0, 1], [2]]
: tensor<6x5xi32> into tensor<2x3x5xi32>
- %26 = linalg.init_tensor [2, 3, 5] : tensor<2x3x5xf32>
+ %26 = tensor.empty() : tensor<2x3x5xf32>
%28 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2) -> (d2)>,
// CHECK-LABEL: func @generalize_pad_tensor_static_shape(
// CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
// CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[INIT:.*]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
+// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
// CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
// CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32>
// CHECK: %[[OUT_DIM2:.*]] = arith.addi %[[OFFSET]], %[[C2]] : index
// CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
// CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
-// CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]] : tensor<4x?x?x?xf32>
+// CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
// CHECK: %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32>
// CHECK: %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
// CHECK: func @inline_zerod(%[[ARG:.*]]: tensor<4xf32>, %[[SCALAR:.*]]: tensor<f32>)
func.func @inline_zerod(%arg0: tensor<4xf32>, %scalar: tensor<f32>) -> tensor<4xf32> {
- %0 = linalg.init_tensor [4] : tensor<4xf32>
+ %0 = tensor.empty() : tensor<4xf32>
// CHECK: linalg.generic {indexing_maps = [#[[MAP]], #[[MAP]]],
// CHECK-SAME: iterator_types = ["parallel"]} ins(%[[ARG]] : tensor<4xf32>)
%1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
// CHECK: func @inline_oned(%[[ARG:.*]]: tensor<4xf32>, %[[SCALAR:.*]]: tensor<1xf32>)
func.func @inline_oned(%arg0: tensor<4xf32>, %scalar: tensor<1xf32>) -> tensor<4xf32> {
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
- %0 = linalg.init_tensor [4] : tensor<4xf32>
+ %0 = tensor.empty() : tensor<4xf32>
// CHECK: linalg.generic {indexing_maps = [#[[MAP]], #[[MAP]]],
// CHECK-SAME: iterator_types = ["parallel"]} ins(%[[ARG]] : tensor<4xf32>)
%1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
// -----
-func.func @init_tensor_err(%arg0 : index, %arg1 : index)
-{
- // expected-error @+1 {{specified type 'tensor<4x?x?x5xf32>' does not match the inferred type 'tensor<4x5x?x?xf32>'}}
- %1 = linalg.init_tensor [4, 5, %arg0, %arg1] : tensor<4x?x?x5xf32>
- return
-}
-
-// -----
-
-func.func @init_tensor_err(%arg0 : index)
-{
- // expected-error @+1 {{expected 4 sizes values}}
- %1 = linalg.init_tensor [4, 5, %arg0] : tensor<4x?x?x5xf32>
- return
-}
-
-// -----
-
-func.func @init_tensor_err(%arg0 : index)
-{
- // expected-error @+1 {{expected 2 dynamic sizes values}}
- %1 = "linalg.init_tensor"(%arg0) {static_sizes = [4, -1, -1, 5]} : (index) -> tensor<4x?x?x5xf32>
- return
-}
-
-// -----
-
func.func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32)
{
- %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
+ %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
// expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}}
linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>)
}
// CHECK: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32>
// CHECK: %[[CTE:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[TMP:.+]] = linalg.init_tensor [1, 32, 32, 1] : tensor<1x32x32x1xf32>
+// CHECK: %[[TMP:.+]] = tensor.empty() : tensor<1x32x32x1xf32>
// CHECK: %[[R1c:.+]] = linalg.fill
// CHECK: %[[R2c:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP4]], #[[$MAP5]]]
// CHECK-LABEL: func @depthwise_conv_1d_nwc_wcm
func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> {
%zero = arith.constant 0.000000e+00 : f32
- %init = linalg.init_tensor [1, 10, 8, 8] : tensor<1x10x8x8xf32>
+ %init = tensor.empty() : tensor<1x10x8x8xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
// CHECK: depthwise_conv_1d_nwc_wcm
%0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
// CHECK-LABEL: func @depthwise_conv_1d_nwc_wc
func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> {
%zero = arith.constant 0.000000e+00 : f32
- %init = linalg.init_tensor [1, 10, 8] : tensor<1x10x8xf32>
+ %init = tensor.empty() : tensor<1x10x8xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
// CHECK: depthwise_conv_1d_nwc_wc
%0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
// CHECK-LABEL: func @depthwise_conv_2d_nhwc_hwcm_tensor
func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> {
%zero = arith.constant 0.000000e+00 : f32
- %init = linalg.init_tensor [2, 3, 4, 2, 3] : tensor<2x3x4x2x3xf32>
+ %init = tensor.empty() : tensor<2x3x4x2x3xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
// CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
// CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-LABEL: func @depthwise_conv_1d_nw_tensor
func.func @depthwise_conv_1d_nw_tensor(%input: tensor<1x113x96xf32>, %filter: tensor<3x96xf32>) -> tensor<1x56x96xf32> {
- %init = linalg.init_tensor [1, 56, 96] : tensor<1x56x96xf32>
+ %init = tensor.empty() : tensor<1x56x96xf32>
// CHECK: %{{.+}} = linalg.depthwise_conv_1d_nw
// CHECK-SAME: {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>)
// CHECK-LABEL: func @depthwise_conv_2d_nhwc_hwc_tensor
func.func @depthwise_conv_2d_nhwc_hwc_tensor(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> {
- %init = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
+ %init = tensor.empty() : tensor<1x56x56x96xf32>
// CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc
// CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>)
// CHECK-LABEL: func @depthwise_conv_2d_nchw_chw_tensor
func.func @depthwise_conv_2d_nchw_chw_tensor(%input: tensor<1x96x113x113xf32>, %filter: tensor<96x3x3xf32>) -> tensor<1x96x56x56xf32> {
- %init = linalg.init_tensor [1, 96, 56, 56] : tensor<1x96x56x56xf32>
+ %init = tensor.empty() : tensor<1x96x56x56xf32>
// CHECK: %{{.+}} = linalg.depthwise_conv_2d_nchw_chw
// CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>)
func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> {
%zero = arith.constant 0.000000e+00 : f32
- %init = linalg.init_tensor [2, 6, 7, 2, 3] : tensor<2x6x7x2x3xf32>
+ %init = tensor.empty() : tensor<2x6x7x2x3xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
// CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
// CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-LABEL: func @depthwise_conv_3d_ndhwc_dhwcm
func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> {
%zero = arith.constant 0.000000e+00 : f32
- %init = linalg.init_tensor [2, 3, 13, 4, 6, 6] : tensor<2x3x13x4x6x6xf32>
+ %init = tensor.empty() : tensor<2x3x13x4x6x6xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
// CHECK: depthwise_conv_3d_ndhwc_dhwcm
%0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
// CHECK-LABEL: func @depthwise_conv_3d_ndhwc_dhwc
func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> {
%zero = arith.constant 0.000000e+00 : f32
- %init = linalg.init_tensor [2, 3, 13, 4, 6] : tensor<2x3x13x4x6xf32>
+ %init = tensor.empty() : tensor<2x3x13x4x6xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
// CHECK: depthwise_conv_3d_ndhwc_dhwc
%0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
- %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
+ %fake = tensor.empty() : tensor<3x3xf32>
+ %init = tensor.empty() : tensor<1x2x2x1xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
%res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
- %init = linalg.init_tensor [1, 1, 2, 2] : tensor<1x1x2x2xf32>
+ %fake = tensor.empty() : tensor<3x3xf32>
+ %init = tensor.empty() : tensor<1x1x2x2xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
%res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
- %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
+ %fake = tensor.empty() : tensor<3x3xf32>
+ %init = tensor.empty() : tensor<1x2x2x1xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
%res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
- %init = linalg.init_tensor [1, 1, 2, 2] : tensor<1x1x2x2xf32>
+ %fake = tensor.empty() : tensor<3x3xf32>
+ %init = tensor.empty() : tensor<1x1x2x2xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
%res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xi8>
- %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi8>
+ %fake = tensor.empty() : tensor<3x3xi8>
+ %init = tensor.empty() : tensor<1x2x2x1xi8>
%cst = arith.constant 0 : i8
%fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
%res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xi16>
- %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi16>
+ %fake = tensor.empty() : tensor<3x3xi16>
+ %init = tensor.empty() : tensor<1x2x2x1xi16>
%cst = arith.constant 0 : i16
%fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
%res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xi32>
- %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi32>
+ %fake = tensor.empty() : tensor<3x3xi32>
+ %init = tensor.empty() : tensor<1x2x2x1xi32>
%cst = arith.constant 0 : i32
%fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
%res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
- %fake = linalg.init_tensor [3, 3] : tensor<3x3xf32>
- %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
+ %fake = tensor.empty() : tensor<3x3xf32>
+ %init = tensor.empty() : tensor<1x2x2x1xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
%res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
- %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
- %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
+ %fake = tensor.empty() : tensor<3x3x3xf32>
+ %init = tensor.empty() : tensor<1x2x2x2x1xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
%res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
- %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
- %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
+ %fake = tensor.empty() : tensor<3x3x3xf32>
+ %init = tensor.empty() : tensor<1x2x2x2x1xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
%res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
- %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32>
- %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32>
+ %fake = tensor.empty() : tensor<3x3x3xf32>
+ %init = tensor.empty() : tensor<1x2x2x2x1xf32>
%cst = arith.constant 0.000000e+00 : f32
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
%res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
//===----------------------------------------------------------------------===//
-// InitTensorOp elimination
+// AllocTensorOp elimination
//===----------------------------------------------------------------------===//
// CHECK-LABEL: func @buffer_forwarding_conflict
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
// CHECK-DAG: %[[TARGET_D0:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[SOURCE_D0]]]
// CHECK-DAG: %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
// CHECK-DAG: %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]]
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[TARGET_D0]], %[[TARGET_D1]]]
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]])
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]]
// CHECK-DAG: %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]]
// CHECK-DAG: %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
%arg3 : f32) -> tensor<49x?xf32> {
%c0 = arith.constant 0 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x42xf32>
- %init = linalg.init_tensor [42, %d0] : tensor<42x?xf32>
+ %init = tensor.empty(%d0) : tensor<42x?xf32>
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
iterator_types = ["parallel", "parallel"]}
// CHECK-DAG: %[[SOURCE:.+]] = linalg.generic
// CHECK-DAG: %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
// CHECK-DAG: %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]], %[[SOURCE_D1]]]
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [49, %[[TARGET_D1]]]
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[TARGET_D1]])
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}outs(%[[INIT]]
// CHECK-DAG: %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
%0 = tensor.collapse_shape %arg0 [[0, 1], [2]] : tensor<?x?x?xf32> into tensor<?x?xf32>
%d0 = tensor.dim %0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %0, %c1 : tensor<?x?xf32>
- %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%1 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
%cst = arith.constant 0.0 : f32
%d0 = tensor.dim %arg0, %c1 : tensor<1x?x?xf32>
%d1 = tensor.dim %arg1, %c2 : tensor<1x?x?xf32>
- %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%fill = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
func.func @generic_op_reshape_consumer_static(%arg0: tensor<264x4xf32>)
-> tensor<8x33x4xf32> {
%cst = arith.constant dense<2.000000e+00> : tensor<264x4xf32>
- %0 = linalg.init_tensor [264, 4] : tensor<264x4xf32>
+ %0 = tensor.empty() : tensor<264x4xf32>
%1 = linalg.generic {
indexing_maps = [#map0, #map0, #map0],
iterator_types = ["parallel", "parallel"]}
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<264x4xf32>
// CHECK-DAG: %[[CST:.+]] = arith.constant
// CHECK-SAME: : tensor<8x33x4xf32>
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [264, 4]
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty()
// CHECK: %[[T0:.+]] = tensor.expand_shape %[[ARG0]]
// CHECK-SAME: [0, 1], [2]
// CHECK-SAME: tensor<264x4xf32> into tensor<8x33x4xf32>
func.func @reshape_as_consumer_permutation
(%a : tensor<210x6x4xi32>, %b : tensor<210x4xi32>)
-> tensor<2x3x4x5x6x7xi32> {
- %shape = linalg.init_tensor [6, 4, 210] : tensor<6x4x210xi32>
+ %shape = tensor.empty() : tensor<6x4x210xi32>
%c = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0, d2)>,
affine_map<(d0, d1, d2) -> (d1, d2)>,
// CHECK: func @reshape_as_consumer_permutation
// CHECK-SAME: %[[ARG0:.+]]: tensor<210x6x4xi32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<210x4xi32>
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [6, 4, 210]
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty()
// CHECK-DAG: %[[T1:.+]] = tensor.expand_shape %[[ARG0]]
// CHECK-SAME: [0, 1, 2], [3, 4], [5]
// CHECK-DAG: %[[T2:.+]] = tensor.expand_shape %[[ARG1]]
%c0 = arith.constant 0 : index
%0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<?x?xf32> into tensor<?xf32>
%1 = tensor.dim %0, %c0 : tensor<?xf32>
- %2 = linalg.init_tensor [%1] : tensor<?xf32>
+ %2 = tensor.empty(%1) : tensor<?xf32>
%3 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
func.func @no_fuse_mismatched_dynamism(%arg0: tensor<2x1xi64>, %arg1: tensor<?xi64>) -> tensor<2xi64> {
%0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x1xi64> into tensor<2xi64>
- %1 = linalg.init_tensor [2] : tensor<2xi64>
+ %1 = tensor.empty() : tensor<2xi64>
%2 = linalg.generic
{indexing_maps = [affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>,
// RUN: mlir-opt -resolve-shaped-type-result-dims -split-input-file %s | FileCheck %s
-func.func @init_tensor_static_dim() -> (index, index) {
+func.func @empty_tensor_static_dim() -> (index, index) {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c6 = arith.constant 6 : index
- %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
+ %0 = tensor.empty(%c6) : tensor<4x5x?xf32>
%1 = tensor.dim %0, %c2 : tensor<4x5x?xf32>
%2 = tensor.dim %0, %c0 : tensor<4x5x?xf32>
return %1, %2 : index, index
}
-// CHECK: func @init_tensor_static_dim
+// CHECK: func @empty_tensor_static_dim
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
// CHECK: return %[[C6]], %[[C4]]
// -----
-func.func @init_tensor_dynamic_dim(%arg0 : index) -> (index) {
+func.func @empty_tensor_dynamic_dim(%arg0 : index) -> (index) {
%c2 = arith.constant 2 : index
- %0 = linalg.init_tensor [4, 5, %arg0] : tensor<4x5x?xf32>
+ %0 = tensor.empty(%arg0) : tensor<4x5x?xf32>
%1 = tensor.dim %0, %c2 : tensor<4x5x?xf32>
return %1 : index
}
-// CHECK: func @init_tensor_dynamic_dim
+// CHECK: func @empty_tensor_dynamic_dim
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index
// CHECK: return %[[ARG0]]
// -----
-func.func @init_tensor_dynamic_dim2(%arg0 : index, %arg1 : index) -> (index, index) {
+func.func @empty_tensor_dynamic_dim2(%arg0 : index, %arg1 : index) -> (index, index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
+ %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
%1 = tensor.dim %0, %c0 : tensor<?x?xf32>
%2 = tensor.dim %0, %c1 : tensor<?x?xf32>
return %1, %2 : index, index
}
-// CHECK: func @init_tensor_dynamic_dim2
+// CHECK: func @empty_tensor_dynamic_dim2
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index
// CHECK: return %[[ARG0]], %[[ARG1]]
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?xf32>
- %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
+ %0 = tensor.empty(%d0, %arg1) : tensor<?x?xf32>
%1 = linalg.generic
{indexing_maps = [affine_map<(d0, d1) -> (d0)>,
affine_map<(d0, d1) -> (d0, d1)>],
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?xf32>
- %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
+ %0 = tensor.empty(%d0, %arg1) : tensor<?x?xf32>
%1 = linalg.generic
{indexing_maps = [affine_map<(d0, d1) -> (d0)>,
affine_map<(d0, d1) -> (d0, d1)>],
#map = affine_map<(d0) -> (d0)>
-func.func @init_tensor_dim_of_linalg_result(%arg_0 : tensor<?xf32>,
+func.func @empty_tensor_dim_of_linalg_result(%arg_0 : tensor<?xf32>,
%arg_1: tensor<?xf32>) -> (index, index) {
%0, %1 = linalg.generic {
indexing_maps = [#map, #map, #map],
%num_elem_1 = tensor.dim %1, %c0 : tensor<?xf32>
return %num_elem_0, %num_elem_1 : index, index
}
-// CHECK: func @init_tensor_dim_of_linalg_result(
+// CHECK: func @empty_tensor_dim_of_linalg_result(
// CHECK-SAME: %[[ARG_0:[a-zA-Z0-9_]+]]: tensor<?xf32>
// CHECK-SAME: %[[ARG_1:[a-zA-Z0-9_]+]]: tensor<?xf32>)
// CHECK: %[[R0:.+]] = tensor.dim %[[ARG_0]]
%arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: i32)
-> (tensor<i32>, tensor<i32>) {
%c0 = arith.constant 0 : index
- %0 = linalg.init_tensor [] : tensor<i32>
+ %0 = tensor.empty() : tensor<i32>
%1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor<i32>) -> tensor<i32>
- %2 = linalg.init_tensor [] : tensor<i32>
+ %2 = tensor.empty() : tensor<i32>
%3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
%4:2 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>],
// -----
-#attr = {"foo"}
-func.func @init_tensor(%arg0 : index, %arg1 : index)
-{
- %0 = linalg.init_tensor [3, 42] : tensor<3x42xf32>
- %1 = linalg.init_tensor [4, %arg0, %arg1, 5] : tensor<4x?x?x5xf32>
- %2 = linalg.init_tensor [2, 2] : tensor<2x2xf32, #attr>
- return
-}
-// CHECK-LABEL: func @init_tensor
-// CHECK: linalg.init_tensor [3, 42] : tensor<3x42xf32>
-// CHECK: linalg.init_tensor [4, %{{.*}}, %{{.*}}, 5] : tensor<4x?x?x5xf32>
-// CHECK: linalg.init_tensor [2, 2] : tensor<2x2xf32, {foo}>
-
-// -----
-
func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor<?x?xf32> {
- %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
+ %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
%1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %1 : tensor<?x?xf32>
}
// CHECK-DAG: %[[ID:.*]] = arith.constant 0.000000e+00 : f32
// CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32>
// CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32>
-// CHECK-DAG: %[[INI:.*]] = linalg.init_tensor [16, 32, 4] : tensor<16x32x4xf32>
+// CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: , iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
// INNERPARALLELCHECK-DAG: %[[ID:.*]] = arith.constant 0.000000e+00 : f32
// INNERPARALLELCHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32>
// INNERPARALLELCHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32>
-// INNERPARALLELCHECK-DAG: %[[INI:.*]] = linalg.init_tensor [16, 32, 4] : tensor<16x32x4xf32>
+// INNERPARALLELCHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
// INNERPARALLELCHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
// INNERPARALLELCHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
// INNERPARALLELCHECK-SAME: , iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
//CHECK-LABEL: @generic_split_1d
// CHECK: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
// CHECK: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
-// CHECK: %[[INI:.*]] = linalg.init_tensor [4] : tensor<4xf32>
+// CHECK: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
// CHECK: %[[G:.*]] = linalg.generic
// CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
//INNERPARALLELCHECK-LABEL: @generic_split_1d
// INNERPARALLELCHECK: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
// INNERPARALLELCHECK: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32>
-// INNERPARALLELCHECK: %[[INI:.*]] = linalg.init_tensor [4] : tensor<4xf32>
+// INNERPARALLELCHECK: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
// INNERPARALLELCHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
// INNERPARALLELCHECK: %[[G:.*]] = linalg.generic
// INNERPARALLELCHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
// CHECK: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32
// CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32>
// CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32>
-// CHECK: %[[INI:.*]] = linalg.init_tensor [5, 2, 4] : tensor<5x2x4xf32>
+// CHECK: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
// INNERPARALLELCHECK: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32
// INNERPARALLELCHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32>
// INNERPARALLELCHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32>
-// INNERPARALLELCHECK: %[[INI:.*]] = linalg.init_tensor [5, 2, 4] : tensor<5x2x4xf32>
+// INNERPARALLELCHECK: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
// INNERPARALLELCHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
// INNERPARALLELCHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
// INNERPARALLELCHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
- %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+ %init = tensor.empty() : tensor<1x112x112x32xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%conv = linalg.conv_2d_nhwc_hwcf
// CHECK: func @conv_tensors_static
// CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>)
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32>
// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
// CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]])
%ow = tensor.dim %elementwise, %c2 : tensor<?x?x?x?xf32>
%oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32>
- %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor<?x?x?x?xf32>
+ %init = tensor.empty(%n, %oh, %ow, %oc) : tensor<?x?x?x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
%conv = linalg.conv_2d_nhwc_hwcf
// CHECK-DAG: %[[ELEM_OW:.+]] = tensor.dim %[[ELEM]], %[[C2]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]] : tensor<?x?x?x?xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor<?x?x?x?xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
// CHECK-PEEL-12: }
func.func @matmul_static_tensor(%arg0: tensor<1500x1600xf32>, %arg1: tensor<1600x1700xf32>)
-> tensor<1500x1700xf32> {
- %out = linalg.init_tensor [1500, 1700] : tensor<1500x1700xf32>
+ %out = tensor.empty() : tensor<1500x1700xf32>
%r = linalg.matmul {__internal_linalg_transform__ = "tile"}
ins(%arg0, %arg1: tensor<1500x1600xf32>, tensor<1600x1700xf32>)
outs(%out: tensor<1500x1700xf32>) -> tensor<1500x1700xf32>
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
- %out = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %out = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%r = linalg.matmul {__internal_linalg_transform__ = "tile"}
ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%out: tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor
+// CHECK-DAG: %[[INIT:.+]] = tensor.empty
// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]]
// CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
// CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]]
%cst = arith.constant 0.0 : f32
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
- %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+ %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
%3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%4 = linalg.matmul {__internal_linalg_transform__ = "tensors_fuse_distribute1"}
ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
- %out = linalg.init_tensor [%d0, 2000] : tensor<?x2000xf32>
+ %out = tensor.empty(%d0) : tensor<?x2000xf32>
%r = linalg.matmul {__internal_linalg_transform__ = "tile"}
ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x2000xf32>)
outs(%out: tensor<?x2000xf32>) -> tensor<?x2000xf32>
%0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
%2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
- %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
+ %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
%4 = linalg.generic
{indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2) -> (d0, d2, d1)>,
// CHECK-LABEL: func @generic_op_tensors
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: %[[TD0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC0:.+]] = %[[INIT]]) -> (tensor<?x?x?xf32>) {
// CHECK: %[[TD1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC1:.+]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
// CHECK: %[[TD2:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC2:.+]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x113x96xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<1x3x96xf32>
func.func @depthwise_conv_2d_nhwc_hwc(%input: tensor<1x1x113x96xf32>, %filter: tensor<1x3x96xf32>) -> tensor<1x1x56x96xf32> {
- // CHECK: %[[RES:.+]] = linalg.init_tensor
- %init = linalg.init_tensor [1, 1, 56, 96] : tensor<1x1x56x96xf32>
+ // CHECK: %[[RES:.+]] = tensor.empty
+ %init = tensor.empty() : tensor<1x1x56x96xf32>
// CHECK: %[[SLICE0:.+]] = tensor.extract_slice %[[ARG0]]
// CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG1]]
// CHECK: %[[SLICERES:.+]] = tensor.extract_slice %[[RES]]
// CHECK-SAME: %[[IN:[0-9a-z]+]]: tensor<64xf32>
// CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<64xf32>
func.func @fuse_untileable_op(%arg0: index, %arg1: tensor<64xf32>, %arg2: tensor<64xf32>) -> tensor<64xf32> {
- %0 = linalg.init_tensor [%arg0] : tensor<?xf32>
+ %0 = tensor.empty(%arg0) : tensor<?xf32>
%1 = affine.apply #map0()[%arg0]
// CHECK: scf.foreach_thread {{.*}} {
%2 = scf.foreach_thread (%arg3) in (%1) shared_outs(%o = %arg2) -> (tensor<64xf32>) {
- // CHECK: %[[INIT_TENSOR:.*]] = linalg.init_tensor
+ // CHECK: %[[INIT_TENSOR:.*]] = tensor.empty
%3 = affine.apply #map1(%arg3)[%arg0]
%4 = affine.min #map2(%arg3)[%arg0]
%5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor<?xf32>
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.init_tensor"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.empty"]} in %arg1
%1 = transform.structured.match ops{["scf.foreach_thread"]} in %arg1
- // linalg.init_tensor is not tileable. The op is cloned and fused.
+ // tensor.empty is not tileable. The op is cloned and fused.
transform.structured.fuse_into_containing_op %0 into %1
}
}
// CHECK-SAME: (%[[INPUT:.+]]: tensor<12x7x25xf32>)
func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf32> {
%five = arith.constant 5.0 : f32
- %init = linalg.init_tensor [12, 25] : tensor<12x25xf32>
+ %init = tensor.empty() : tensor<12x25xf32>
-// CHECK: %[[INIT:.+]] = linalg.init_tensor [12, 25]
+// CHECK: %[[INIT:.+]] = tensor.empty()
// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index
// CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
// CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]])
#map1 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
func.func @match_complex_attribute(%arg0: tensor<12x128x32xf32>)
-> tensor<128x12x32xf32> {
- %0 = linalg.init_tensor [128, 12, 32] : tensor<128x12x32xf32>
+ %0 = tensor.empty() : tensor<128x12x32xf32>
// expected-remark @below {{matched complex attr}}
%1 = linalg.generic {indexing_maps = [#map0, #map1],
iterator_types = ["parallel", "parallel", "parallel"]}
func.func @generic_interchanged_transpose(%arg0: tensor<12x128x32xf32>) -> tensor<128x12x32xf32> {
// CHECK: %[[IN:.+]] = vector.transfer_read
// CHECK: vector.transfer_write %[[IN]], {{.+}} permutation_map = #[[MAP]]
- %0 = linalg.init_tensor [128, 12, 32] : tensor<128x12x32xf32>
+ %0 = tensor.empty() : tensor<128x12x32xf32>
%1 = linalg.generic {indexing_maps = [#map0, #map1],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<12x128x32xf32>)
// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
+// CHECK-DAG: %[[INIT:.*]] = tensor.empty() : tensor<2x3x4xf32>
// CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32>
// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32>
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32>
// CHECK-NOT: tensor.pad
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK: %[[INIT:.*]] = linalg.init_tensor [2, 6, 4] : tensor<2x6x4xf32>
+// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<2x6x4xf32>
// CHECK: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x6x4xf32>
// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<2x6x4xf32>, tensor<2x6x4xf32>
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : tensor<2x5x2xf32>, vector<2x5x2xf32>
// CHECK: %[[DIM3:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
// CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index
// CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index
-// CHECK: %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32>
+// CHECK: %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32>
// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
// CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
// CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
// CHECK-LABEL: func @red_max_2d(
func.func @red_max_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
// CHECK: %[[CMINF:.+]] = arith.constant dense<-3.402820e+38> : vector<4xf32>
- // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+ // CHECK: tensor.empty() : tensor<4xf32>
// CHECK: vector.multi_reduction <maxf>, {{.*}}, %[[CMINF]] [1] : vector<4x4xf32> to vector<4xf32>
// CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
%ident = arith.constant -3.40282e+38 : f32
- %init = linalg.init_tensor [4] : tensor<4xf32>
+ %init = tensor.empty() : tensor<4xf32>
%fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK-LABEL: func @red_min_2d(
func.func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
// CHECK: %[[CMAXF:.+]] = arith.constant dense<3.402820e+38> : vector<4xf32>
- // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+ // CHECK: tensor.empty() : tensor<4xf32>
// CHECK: vector.transfer_read {{.*}} : tensor<4x4xf32>, vector<4x4xf32>
// CHECK: vector.multi_reduction <minf>, {{.*}}, %[[CMAXF]] [1] : vector<4x4xf32> to vector<4xf32>
// CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
%maxf32 = arith.constant 3.40282e+38 : f32
- %init = linalg.init_tensor [4] : tensor<4xf32>
+ %init = tensor.empty() : tensor<4xf32>
%fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK-LABEL: func @red_mul_2d(
func.func @red_mul_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
- // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+ // CHECK: tensor.empty() : tensor<4xf32>
// CHECK: vector.transfer_read {{.*}} : tensor<4x4xf32>, vector<4x4xf32>
// CHECK: vector.multi_reduction <mul>, {{.*}}, {{.*}} [1] : vector<4x4xf32> to vector<4xf32>
// CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
%ident = arith.constant 1.0 : f32
- %init = linalg.init_tensor [4] : tensor<4xf32>
+ %init = tensor.empty() : tensor<4xf32>
%fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK-LABEL: func @red_or_2d(
func.func @red_or_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
- // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+ // CHECK: tensor.empty() : tensor<4xi1>
// CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
// CHECK: vector.multi_reduction <or>, {{.*}}, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
// CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
%ident = arith.constant false
- %init = linalg.init_tensor [4] : tensor<4xi1>
+ %init = tensor.empty() : tensor<4xi1>
%fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK-LABEL: func @red_and_2d(
func.func @red_and_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
- // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+ // CHECK: tensor.empty() : tensor<4xi1>
// CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
// CHECK: vector.multi_reduction <and>, {{.*}}, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
// CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
%ident = arith.constant true
- %init = linalg.init_tensor [4] : tensor<4xi1>
+ %init = tensor.empty() : tensor<4xi1>
%fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK-LABEL: func @red_xor_2d(
func.func @red_xor_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
- // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+ // CHECK: tensor.empty() : tensor<4xi1>
// CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
// CHECK: vector.multi_reduction <xor>, {{.*}}, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
// CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
%ident = arith.constant false
- %init = linalg.init_tensor [4] : tensor<4xi1>
+ %init = tensor.empty() : tensor<4xi1>
%fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
// CHECK: subf {{.*}} : vector<4x4xf32>
// CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32>
%c0 = arith.constant 0.0 : f32
- %init = linalg.init_tensor [4, 4] : tensor<4x4xf32>
+ %init = tensor.empty() : tensor<4x4xf32>
%fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, 0)>,
// CHECK: vector.multi_reduction <add>, {{.*}}, {{.*}} : vector<4x4xf32> to vector<4xf32>
// CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32>
%c0 = arith.constant 0.0 : f32
- %init = linalg.init_tensor [4] : tensor<4xf32>
+ %init = tensor.empty() : tensor<4xf32>
%fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, 0)>,
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
%f0 = arith.constant 0.000000e+00 : f32
- // CHECK: %[[init:.*]] = linalg.init_tensor [] : tensor<f32>
- %0 = linalg.init_tensor [] : tensor<f32>
+ // CHECK: %[[init:.*]] = tensor.empty() : tensor<f32>
+ %0 = tensor.empty() : tensor<f32>
%1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<f32>) -> tensor<f32>
// CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]]
// CHECK-LABEL: func @not_projected_permutation
func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> {
%c0 = arith.constant 0.0 : f32
- %init = linalg.init_tensor [6, 6, 3, 3] : tensor<6x6x3x3xf32>
+ %init = tensor.empty() : tensor<6x6x3x3xf32>
%fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32>
// CHECK: linalg.generic
%result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>,
// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_INITTENSOR:.*]] = linalg.init_tensor [32] : tensor<32xf32>
+// CHECK: %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32>
// CHECK: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
// CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
// CHECK: return %[[VAL_11]] : tensor<32xf32>
// CHECK: }
func.func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32> {
- %u = linalg.init_tensor [32] : tensor<32xf32>
+ %u = tensor.empty() : tensor<32xf32>
%0 = linalg.generic #trait1
ins(%arga: tensor<32xf32, #DV>)
outs(%u: tensor<32xf32>) {
// CHECK: }
func.func @fold_yield_arg_zero() -> tensor<1024x1024xf64> {
%cst = arith.constant 0.000000e+00 : f64
- %0 = linalg.init_tensor [1024, 1024] : tensor<1024x1024xf64>
+ %0 = tensor.empty() : tensor<1024x1024xf64>
%1 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> ()>,
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
// CHECK: }
func.func @fold_yield_direct_zero() -> tensor<32xf64> {
%cst = arith.constant 0.000000e+00 : f64
- %0 = linalg.init_tensor [32] : tensor<32xf64>
+ %0 = tensor.empty() : tensor<32xf64>
%1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
outs(%0 : tensor<32xf64>) {
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK-DAG: %[[VAL_10a:.*]] = linalg.init_tensor [8] : tensor<8xi64>
+// CHECK-DAG: %[[VAL_10a:.*]] = tensor.empty() : tensor<8xi64>
// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_10a]] : memref<8xi64>
// CHECK-DAG: linalg.fill ins(%[[VAL_5]] : i64) outs(%[[VAL_10]] : memref<8xi64>)
// CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: return %[[VAL_20]] : tensor<8xi64>
// CHECK: }
func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
- %init = linalg.init_tensor [8] : tensor<8xi64>
+ %init = tensor.empty() : tensor<8xi64>
%r = linalg.generic #trait_1d
ins(%arga: tensor<8xi64, #SparseVector>)
outs(%init: tensor<8xi64>) {
// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK-DAG: %[[VAL_9a:.*]] = linalg.init_tensor [8] : tensor<8xi64>
+// CHECK-DAG: %[[VAL_9a:.*]] = tensor.empty() : tensor<8xi64>
// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_9a]] : memref<8xi64>
// CHECK-DAG: linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_9]] : memref<8xi64>)
// CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: return %[[VAL_35]] : tensor<8xi64>
// CHECK: }
func.func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
- %init = linalg.init_tensor [8] : tensor<8xi64>
+ %init = tensor.empty() : tensor<8xi64>
%r = linalg.generic #trait_1d
ins(%arga: tensor<8xi64, #SparseVector>)
outs(%init: tensor<8xi64>) {
%1 = tensor.insert_slice %0 into %input[%c0, 1, %c0, 0] [1, 1, 2, 4] [1, 1, 1, 1] : tensor<1x2x4xf32> into tensor<1x2x2x4xf32>
return %1: tensor<1x2x2x4xf32>
}
+
+// -----
+
+func.func @empty_canonicalize() -> (tensor<4x5x?xf32>) {
+ %c6 = arith.constant 6 : index
+ %0 = tensor.empty(%c6) : tensor<4x5x?xf32>
+ return %0 : tensor<4x5x?xf32>
+}
+// CHECK: func @empty_canonicalize
+// CHECK: %[[T0:.+]] = tensor.empty() : tensor<4x5x6xf32>
+// CHECK: %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32>
+// CHECK: return %[[T1]]
+
+// -----
+
+func.func @empty_reshape_expansion(%arg0 : index) -> tensor<2x3x5x4x?x7xf32> {
+ %0 = tensor.empty(%arg0) : tensor<6x5x?xf32>
+ %1 = tensor.expand_shape %0 [[0, 1], [2], [3, 4, 5]]
+ : tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
+ return %1 : tensor<2x3x5x4x?x7xf32>
+}
+// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
+// CHECK: func @empty_reshape_expansion
+// CHECK-SAME: %[[ARG0:.+]]: index
+// CHECK-NEXT: %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
+// CHECK-NEXT: %[[INIT:.+]] = tensor.empty(%[[D]])
+// CHECK-NEXT: return %[[INIT]]
+
+// -----
+
+func.func @empty_reshape_collapse(%arg0 : index) -> tensor<6x5x?xf32> {
+ %0 = tensor.empty(%arg0) : tensor<2x3x5x4x?x7xf32>
+ %1 = tensor.collapse_shape %0 [[0, 1], [2], [3, 4, 5]]
+ : tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
+ return %1 : tensor<6x5x?xf32>
+}
+// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
+// CHECK: func @empty_reshape_collapse
+// CHECK-SAME: %[[ARG0:.+]]: index
+// CHECK-NEXT: %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
+// CHECK-NEXT: %[[INIT:.+]] = tensor.empty(%[[D]])
+// CHECK-NEXT: return %[[INIT]]
+
+// -----
+
+func.func @fold_empty_tensor_with_slice
+ (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
+{
+ %0 = tensor.empty(%arg0) : tensor<?x10x40xf32>
+ %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
+ : tensor<?x10x40xf32> to tensor<5x?x20xf32>
+ return %1 : tensor<5x?x20xf32>
+}
+// CHECK: func @fold_empty_tensor_with_slice
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index
+// CHECK: %[[T0:.+]] = tensor.empty(%[[ARG1]])
+// CHECK: return %[[T0]]
+
+// -----
+
+func.func @fold_empty_tensor_with_cast(%arg0 : index) -> tensor<1x12xf32> {
+ %0 = tensor.empty(%arg0) : tensor<?x12xf32>
+ %1 = tensor.cast %0 : tensor<?x12xf32> to tensor<1x12xf32>
+ return %1 : tensor<1x12xf32>
+}
+// CHECK: func @fold_empty_tensor_with_cast(%[[ARG0:.+]]: index)
+// CHECK: %[[T0:.+]] = tensor.empty() : tensor<1x12xf32>
+// CHECK: return %[[T0]] : tensor<1x12xf32>
+
+// -----
+
+func.func private @some_use(%i : index, %j : index)
+
+// CHECK-LABEL: func @empty_tensor_canonicalize
+// CHECK-SAME: %[[I:.*]]: index
+func.func @empty_tensor_canonicalize(%i : index) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+
+ // CHECK-NOT: tensor.empty
+ %0 = tensor.empty(%i) : tensor<?x42xf32>
+
+ // CHECK-NOT: tensor.dim
+ %1 = tensor.dim %0, %c0: tensor<?x42xf32>
+ %2 = tensor.dim %0, %c1: tensor<?x42xf32>
+
+ // CHECK: %[[c42:.*]] = arith.constant 42 : index
+ // CHECK: call @some_use(%[[I]], %[[c42]])
+ call @some_use(%1, %2) : (index, index) -> ()
+
+ return
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_reducing_empty_tensor_extract
+func.func @rank_reducing_empty_tensor_extract(%sz : index, %idx : index) -> tensor<2xf32> {
+ // CHECK: tensor.empty() : tensor<2xf32>
+ %a = tensor.empty(%sz) : tensor<?x2xf32>
+
+ // CHECK-NOT: extract
+ %r = tensor.extract_slice %a[%idx, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
+ return %r: tensor<2xf32>
+}
// CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
// CHECK-DAG: %[[c5:.+]] = arith.constant 5 : index
// CHECK-DAG: %[[c7:.+]] = arith.constant 7 : index
-// CHECK-DAG: %[[init:.+]] = linalg.init_tensor [20, 11] :
+// CHECK-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
// CHECK-DAG: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c20]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
// CHECK: %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] :
// FOREACH-DAG: %[[c3:.+]] = arith.constant 3 : index
// FOREACH-DAG: %[[c5:.+]] = arith.constant 5 : index
// FOREACH-DAG: %[[c7:.+]] = arith.constant 7 : index
-// FOREACH-DAG: %[[init:.+]] = linalg.init_tensor [20, 11] :
+// FOREACH-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
// FOREACH: %[[tile:.+]] = scf.foreach_thread (%[[iv:.+]]) in (%[[c20]]) shared_outs(%[[dest:.+]] = %[[init]])
// FOREACH: %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
// FOREACH: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] :
// CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
// CHECK-DAG: %[[c5:.+]] = arith.constant 5 : index
// CHECK-DAG: %[[c7:.+]] = arith.constant 7 : index
-// CHECK: %[[init:.+]] = linalg.init_tensor [10, 5] :
+// CHECK: %[[init:.+]] = tensor.empty() : tensor<10x5xf32>
// CHECK: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c10]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
// CHECK: %[[inputIv:.+]] = affine.apply #[[$map0]](%[[iv]])
// CHECK: %[[multiIndex:.+]]:3 = affine.delinearize_index %[[inputIv]] into (%[[c3]], %[[c5]], %[[c7]]
// CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[c2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
-// CHECK: %[[init:.+]] = linalg.init_tensor [%[[sz]], 5] : tensor<?x5xf32>
+// CHECK: %[[init:.+]] = tensor.empty(%[[sz]]) : tensor<?x5xf32>
// CHECK-DAG: %[[d1:.+]] = tensor.dim %arg0, %[[c1]] : tensor<3x?x?x11xf32>
// CHECK-DAG: %[[d2:.+]] = tensor.dim %arg0, %[[c2]] : tensor<3x?x?x11xf32>
// CHECK: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[sz]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
// CHECK-DAG: %[[c3:.+]] = arith.constant 3 : index
// CHECK-DAG: %[[c4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[c11:.+]] = arith.constant 11 : index
-// CHECK: %[[init:.+]] = linalg.init_tensor [%[[sz1]], %[[sz2]]] : tensor<?x?xf32>
+// CHECK: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] :
// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] :
// CHECK-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
// FOREACH-DAG: %[[c3:.+]] = arith.constant 3 : index
// FOREACH-DAG: %[[c4:.+]] = arith.constant 4 : index
// FOREACH-DAG: %[[c11:.+]] = arith.constant 11 : index
-// FOREACH: %[[init:.+]] = linalg.init_tensor [%[[sz1]], %[[sz2]]] : tensor<?x?xf32>
+// FOREACH: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
// FOREACH-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] :
// FOREACH-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] :
// FOREACH-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
%collapsed = tensor.collapse_shape %input [[0, 1], [2]] : tensor<30x11x100xf32> into tensor<330x100xf32>
%slice = tensor.extract_slice %collapsed [0, %offt] [330, %size] [1, 1] : tensor<330x100xf32> to tensor<330x?xf32>
// CHECK-NOT: scf.for
- // CHECK: %[[init:.+]] = linalg.init_tensor [330, %[[arg2]]]
+ // CHECK: %[[init:.+]] = tensor.empty(%[[arg2]])
// CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, %[[arg1]]] [30, 11, %[[arg2]]] [1, 1, 1]
// CHECK: %[[c:.+]] = tensor.collapse_shape %[[e]] {{\[}}[0, 1], [2]]
// CHECK: %[[res:.+]] = tensor.insert_slice %[[c]] into %[[init]]
(tensor<f32>, tensor<4x5x6xf32>, tensor<1x2x3xindex>) -> tensor<1x2x1xf32>
return
}
+
+// -----
+
+func.func @empty_wrong_number_of_operands(%sz : index) {
+ // expected-error@+1 {{incorrect number of dynamic sizes, has 1, expected 2}}
+ %out = tensor.empty(%sz) : tensor<2x?x?x5xf32>
+ return
+}
return
}
+// CHECK-LABEL: func @empty(
+// CHECK-SAME: %[[sz:.*]]: index
+func.func @empty(%sz: index) -> tensor<5x?x6xf32> {
+ // CHECK: tensor.empty(%[[sz]]) : tensor<5x?x6xf32>
+ %0 = tensor.empty(%sz) : tensor<5x?x6xf32>
+ return %0 : tensor<5x?x6xf32>
+}
+
// CHECK-LABEL: func @extract(
// CHECK-SAME: %[[TENSOR:.*]]: tensor<?x?x?xf32>,
// CHECK-SAME: %[[INDEX:.*]]: index) {
// RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \
-// RUN: -linalg-init-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \
+// RUN: -empty-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \
// RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \
// RUN: -finalizing-bufferize -buffer-deallocation \
// RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
from mlir.dialects import func
from mlir.dialects import linalg
from mlir.dialects import sparse_tensor
+from mlir.dialects import tensor
from mlir.dialects.linalg.opdsl import lang
from . import mlir_pytaco_utils as utils
if self.dst_format is None or self.dst_format.rank() == 0:
# Initialize the dense tensor.
ir_type = _mlir_type_from_taco_type(self.dst_dtype)
- tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
+ empty = tensor.EmptyOp(self.dst_dims, ir_type).result
zero = arith.ConstantOp(ir_type, 0.0)
- return linalg.fill(zero, outs=[tensor])
+ return linalg.fill(zero, outs=[empty])
# Initialize the sparse tensor.
mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims,
"""
if array.dtype != np.float32 and array.dtype != np.float64:
raise ValueError(f"Expected floating point value type: {array.dtype}.")
- tensor = Tensor(
+ t = Tensor(
array.shape,
dtype=_nptype_to_taco_type(array.dtype.type),
is_dense=True)
- tensor._dense_storage = np.copy(array)
- return tensor
+ t._dense_storage = np.copy(array)
+ return t
@staticmethod
def from_coo(
# The size of each dimension is one more that such a maximum coordinate
# value.
shape = [c + 1 for c in max_coordinate]
- tensor = Tensor(shape, fmt, dtype=dtype)
- tensor._coords = coordinates
- tensor._values = values
+ t = Tensor(shape, fmt, dtype=dtype)
+ t._coords = coordinates
+ t._values = values
return tensor
sparse_tensor, shape = utils.create_sparse_tensor(filename,
fmt.format_pack.formats,
_dtype_to_mlir_str(dtype))
- tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
- tensor._set_packed_sparse_tensor(sparse_tensor)
+ t = Tensor(shape.tolist(), fmt, dtype=dtype)
+ t._set_packed_sparse_tensor(sparse_tensor)
- return tensor
+ return t
def to_file(self, filename: str) -> None:
"""Output the tensor value to a file.
%cst = arith.constant 0.0 : f32
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
- %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK: func.func @gemm_fill_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])
// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
%cst = arith.constant 0.0 : f32
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
- %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%gemm = linalg.matmul
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>,
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>)
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])
// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
%cst = arith.constant 0.0 : f32
%d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %rhs0, %c1 : tensor<?x?xf32>
- %init0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%gemm0 = linalg.matmul
ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%d2 = tensor.dim %rhs1, %c1 : tensor<?x?xf32>
- %init1 = linalg.init_tensor [%d0, %d2] : tensor<?x?xf32>
+ %init1 = tensor.empty(%d0, %d2) : tensor<?x?xf32>
%fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
%gemm1 = linalg.matmul {__internal_linalg_transform__ = "gemm_fusion"}
ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[LHS0]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[RHS0]], %[[C1]]
-// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
+// CHECK-DAG: %[[INIT0:.+]] = tensor.empty(%[[D0]], %[[D1]])
// CHECK-DAG: %[[D2:.+]] = tensor.dim %[[RHS1]], %[[C1]]
-// CHECK: %[[INIT1:.+]] = linalg.init_tensor [%[[D0]], %[[D2]]]
+// CHECK: %[[INIT1:.+]] = tensor.empty(%[[D0]], %[[D2]])
// CHECK: scf.for %[[IV:[a-zA-Z0-9]+]] =
// CHECK-SAME: iter_args(%[[ITERARG:.+]] = %[[INIT1]])
// CHECK-DAG: %[[LHS0_TILE:.+]] = tensor.extract_slice %[[LHS0]][%[[IV]], 0]
%cst = arith.constant 0.0 : f32
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
- %init0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%gemm = linalg.matmul
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
- %init1 = linalg.init_tensor [%d1, %d0] : tensor<?x?xf32>
+ %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
%transpose = linalg.generic {
__internal_linalg_transform__ = "fusion",
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C1]]
-// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [%[[D1]], %[[D0]]]
+// CHECK-DAG: %[[INIT0:.+]] = tensor.empty(%[[D0]], %[[D1]])
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT1]])
// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
%cst = arith.constant 0.0 : f32
- %0 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
+ %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.matmul
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK: func.func @interchange_matmul_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
-// CHECK: %[[INIT:.+]] = linalg.init_tensor
+// CHECK: %[[INIT:.+]] = tensor.empty
// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
// CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[INIT]])
// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%3 = tensor.dim %2, %c0 : tensor<?x?xf32>
%4 = tensor.dim %2, %c1 : tensor<?x?xf32>
- %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
+ %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
%6 = linalg.generic
{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, d1)>,
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%3 = tensor.dim %2, %c0 : tensor<?x?xf32>
%4 = tensor.dim %2, %c1 : tensor<?x?xf32>
- %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
+ %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
%6 = linalg.generic
{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d1, d0)>,
#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
func.func @multi_result(%arg0 : tensor<128x200x300xf32>) -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) {
- %init0 = linalg.init_tensor [128, 300, 200] : tensor<128x300x200xf32>
- %init1 = linalg.init_tensor [300, 128, 200] : tensor<300x128x200xf32>
+ %init0 = tensor.empty() : tensor<128x300x200xf32>
+ %init1 = tensor.empty() : tensor<300x128x200xf32>
%0:2 = linalg.generic {
indexing_maps = [#map0, #map1, #map2],
iterator_types = ["parallel", "parallel", "parallel"]}
// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index
// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
// CHECK-DAG: %[[C300:.+]] = arith.constant 300 : index
-// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [128, 300, 200]
-// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [300, 128, 200]
+// CHECK-DAG: %[[INIT0:.+]] = tensor.empty()
+// CHECK-DAG: %[[INIT1:.+]] = tensor.empty()
// CHECK: %[[OUTER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[C128]] step %[[C10]]
// CHECK-SAME: iter_args(%[[ARG1:[a-zA-Z0-9]+]] = %[[INIT0]], %[[ARG2:[a-zA-Z0-9]+]] = %[[INIT1]])
// CHECK: %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[C128]]]
// Create the destination tensor using the above values.
Type elementType = op.getSourceType().getElementType();
SmallVector<OpFoldResult> outputShape = getAsOpFoldResult(reifiedShapes[0]);
- Value dest = rewriter.create<linalg::InitTensorOp>(
- op->getLoc(), outputShape, elementType);
+ Value dest = rewriter.create<tensor::EmptyOp>(op->getLoc(), outputShape,
+ elementType);
// Calculate the parameters for the tile loop nest.
FailureOr<tensor::ExtractSliceFromCollapseHelper> params =
from mlir.dialects import builtin
from mlir.dialects import func
from mlir.dialects import linalg
+from mlir.dialects import tensor
from mlir.dialects.linalg.opdsl.lang import *
# CHECK-LABEL: func @test_matmul_mono
# CHECK-SAME: %[[A:.+]]: tensor<4x16xf32>
# CHECK-SAME: %[[B:.+]]: tensor<16x8xf32>
- # CHECK: %[[INITC:.+]] = linalg.init_tensor [4, 8] : tensor<4x8xf32>
+ # CHECK: %[[INITC:.+]] = tensor.empty() : tensor<4x8xf32>
# CHECK: linalg.generic
# CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]]
# CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
@func.FuncOp.from_py_func(
RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32))
def test_matmul_mono(lhs, rhs):
- init_result = linalg.InitTensorOp([4, 8], f32)
+ init_result = tensor.EmptyOp([4, 8], f32)
return matmul_mono(lhs, rhs, outs=[init_result.result])
# CHECK-LABEL: @test_i8i8i32_matmul
# RUN: %PYTHON %s | FileCheck %s
-from mlir.dialects import arith, builtin, func, linalg
+from mlir.dialects import arith, builtin, func, linalg, tensor
from mlir.dialects.linalg.opdsl.lang import *
from mlir.ir import *
return f
-# CHECK-LABEL: TEST: testInitTensor
-@run
-def testInitTensor():
- with Context() as ctx, Location.unknown():
- module = Module.create()
- f32 = F32Type.get()
- with InsertionPoint(module.body):
- # CHECK-LABEL: func @static_sizes
- # CHECK: %0 = linalg.init_tensor [3, 4] : tensor<3x4xf32>
- @func.FuncOp.from_py_func()
- def static_sizes():
- return linalg.InitTensorOp([3, 4], f32)
-
- # CHECK-LABEL: func @dynamic_sizes
- # CHECK: %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
- @func.FuncOp.from_py_func(IndexType.get(), IndexType.get())
- def dynamic_sizes(d0, d1):
- return linalg.InitTensorOp([d0, d1], f32)
-
- # CHECK-LABEL: func @zero_d
- # CHECK: %0 = linalg.init_tensor [] : tensor<f32>
- @func.FuncOp.from_py_func()
- def zero_d():
- return linalg.InitTensorOp([], f32)
-
- print(module)
-
-
-# CHECK-LABEL: TEST: testInitTensorStaticSizesAttribute
-@run
-def testInitTensorStaticSizesAttribute():
- with Context() as ctx, Location.unknown():
- module = Module.create()
- f32 = F32Type.get()
- with InsertionPoint(module.body):
- op = linalg.InitTensorOp([3, 4], f32)
- # CHECK: [3, 4]
- print(op.attributes["static_sizes"])
-
-
# CHECK-LABEL: TEST: testFill
@run
def testFill():
@func.FuncOp.from_py_func(
RankedTensorType.get((4, 8), f32), RankedTensorType.get((4, 8), f32))
def named_form(lhs, rhs):
- init_result = linalg.InitTensorOp([4, 8], f32)
+ init_result = tensor.EmptyOp([4, 8], f32)
# Check for the named form with custom format
# CHECK: linalg.elemwise_unary
# CHECK-SAME: cast = #linalg.type_fn<cast_signed>
RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
f32))
def named_form(lhs, rhs):
- init_result = linalg.InitTensorOp([4, 8], f32)
+ init_result = tensor.EmptyOp([4, 8], f32)
# CHECK: "linalg.matmul"(%{{.*}})
# CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
# CHECK-NEXT: arith.mulf{{.*}} (f32, f32) -> f32
RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8),
f32))
def generic_form(lhs, rhs):
- init_result = linalg.InitTensorOp([4, 8], f32)
+ init_result = tensor.EmptyOp([4, 8], f32)
# CHECK: linalg.generic
return linalg.matmul(
lhs, rhs, outs=[init_result.result], emit_generic=True)
lhs = linalg.fill(one, outs=[arg0])
# CHECK: %[[RHS:.*]] = linalg.fill
rhs = linalg.fill(one, outs=[arg1])
- # CHECK: %[[INIT:.*]] = linalg.init_tensor
- init = linalg.InitTensorOp([4, 8], f32)
+ # CHECK: %[[INIT:.*]] = tensor.empty
+ init = tensor.EmptyOp([4, 8], f32)
# CHECK: linalg.matmul
# CHECK: ins(%[[LHS]], %[[RHS]]
# CHECK: outs(%[[INIT]]
return [d0.result, d1.result]
print(module)
+
+
+# CHECK-LABEL: TEST: testEmptyOp
+@run
+def testEmptyOp():
+ with Context() as ctx, Location.unknown():
+ module = Module.create()
+ f32 = F32Type.get()
+ with InsertionPoint(module.body):
+ # CHECK-LABEL: func @static_sizes
+ # CHECK: %0 = tensor.empty() : tensor<3x4xf32>
+ @func.FuncOp.from_py_func()
+ def static_sizes():
+ return tensor.EmptyOp([3, 4], f32)
+
+ # CHECK-LABEL: func @dynamic_sizes
+ # CHECK: %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
+ @func.FuncOp.from_py_func(IndexType.get(), IndexType.get())
+ def dynamic_sizes(d0, d1):
+ return tensor.EmptyOp([d0, d1], f32)
+
+ # CHECK-LABEL: func @mixed_static_dynamic_sizes
+ # CHECK: %0 = tensor.empty(%arg0) : tensor<?x4xf32>
+ @func.FuncOp.from_py_func(IndexType.get())
+ def mixed_static_dynamic_sizes(d0):
+ return tensor.EmptyOp([d0, 4], f32)
+
+ # CHECK-LABEL: func @zero_d
+ # CHECK: %0 = tensor.empty() : tensor<f32>
+ @func.FuncOp.from_py_func()
+ def zero_d():
+ return tensor.EmptyOp([], f32)
+
+ print(module)
hdrs = ["include/mlir/Dialect/Tensor/IR/Tensor.h"],
includes = ["include"],
deps = [
+ ":AffineDialect",
":ArithDialect",
":ArithUtils",
":CastOpInterfaces",