return success();
}
-// Specialization for `linalg::GenericOp`.
+/// Specialization for `linalg::GenericOp` and `linalg::IndexedGenericOp`.
/// A pattern to convert Generic Linalg operations which work on tensors to
/// use buffers. BufferPlacement pass should be later used to move
/// Alloc operations to the correct positions and insert the missing Dealloc
/// operations in the correct places.
-static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter,
- linalg::GenericOp genericOp,
- ValueRange inputs, ValueRange outputs) {
+template <typename GenericOpTy>
+static void
+finalizeBufferAllocationForGenericOp(ConversionPatternRewriter &rewriter,
+ GenericOpTy genericOp, ValueRange inputs,
+ ValueRange outputs) {
// Generate a new linalg operation that works on buffers.
- auto newGenericOp = rewriter.create<linalg::GenericOp>(
+ auto newGenericOp = rewriter.create<GenericOpTy>(
genericOp.getLoc(),
/*resultTensorTypes=*/llvm::None,
/*inputs=*/inputs,
rewriter.replaceOp(genericOp, outputs);
}
-// TODO: Specialization for `linalg::IndexedGenericOp`.
-
-// Specialization for all other `linalg::LinalgOp`.
+/// Specialization for all other `linalg::LinalgOp`.
static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter,
linalg::LinalgOp linalgOp,
ValueRange inputs, ValueRange outputs) {
// Delegate to the linalg generic pattern.
if (auto genericOp = dyn_cast<linalg::GenericOp>(op)) {
- finalizeBufferAllocation(rewriter, genericOp, adaptor.inputs(),
- newOutputBuffers);
+ finalizeBufferAllocationForGenericOp<GenericOp>(
+ rewriter, genericOp, adaptor.inputs(), newOutputBuffers);
+ return success();
+ }
+
+ // Delegate to the linalg indexed generic pattern.
+ if (auto genericOp = dyn_cast<linalg::IndexedGenericOp>(op)) {
+ finalizeBufferAllocationForGenericOp<IndexedGenericOp>(
+ rewriter, genericOp, adaptor.inputs(), newOutputBuffers);
return success();
}
// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.*}} : memref<4xf32>)
// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
+// CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
%0, %1 = linalg.generic {
indexing_maps = [#map0, #map0, #map0],
// -----
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @multiple_results_indexed
+// CHECK: %[[RESULT0:.*]] = alloc() : memref<4xi32>
+// CHECK: %[[RESULT1:.*]] = alloc() : memref<4xi32>
+// CHECK: linalg.indexed_generic
+// CHECK-SAME: ins(%{{.*}} : memref<4xi32>)
+// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<4xi32>, memref<4xi32>)
+// CHECK-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32):
+func @multiple_results_indexed(%arg0: tensor<4xi32>)
+ -> (tensor<4xi32>, tensor<4xi32>) {
+ %0, %1 = linalg.indexed_generic {
+ indexing_maps = [#map0, #map0, #map0],
+ iterator_types = ["parallel"]
+ } ins(%arg0 : tensor<4xi32>) {
+ ^bb0(%i: index, %gen_arg1: i32):
+ %i_i32 = index_cast %i : index to i32
+ %tmp1 = addi %gen_arg1, %i_i32 : i32
+ linalg.yield %tmp1, %tmp1 : i32, i32
+ } -> tensor<4xi32>, tensor<4xi32>
+ return %0, %1 : tensor<4xi32>, tensor<4xi32>
+}
+
+// -----
+
#map_2d = affine_map<(d0, d1) -> (d0, d1)>
#map_2d_inv = affine_map<(d0, d1) -> (d1, d0)>