+++ /dev/null
-// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s
-
-// Since allowMemrefEscaping is on for Buffer Placement in this test pass, all
-// tensor typed function results are converted to memref and remain as function
-// results. All memref typed function results will escape from the deallocation
-// phase of Buffer Placement.
-
-// CHECK-LABEL: func @void_function_signature_conversion
-func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) {
- return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>)
-
-// -----
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @complex_signature_conversion
-func @complex_signature_conversion(%arg0: tensor<5xf32>, %arg1: memref<10xf32>, %arg2: i1, %arg3: f16) -> (i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16) {
- %0 = alloc() : memref<15xf32>
- %1 = linalg.generic {
- indexing_maps = [#map0, #map0],
- iterator_types = ["parallel"]}
- ins(%arg0 : tensor<5xf32>) {
- ^bb0(%gen1_arg0: f32):
- %tmp1 = exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- } -> tensor<5xf32>
- return %arg2, %1, %arg1, %0, %arg3 : i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16
-}
-// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16)
-// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16)
-// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK: %[[LINALG_ALLOC:.*]] = alloc()
-// CHECK: return %[[ARG2]], %[[LINALG_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]], %[[ARG3]]
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
- return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>)
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
- return %arg0, %arg1 : i1, f16
-}
-// CHECK: (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: f16) -> (i1, f16)
-// CHECK: return %[[ARG0]], %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @simple_signature_conversion
-func @simple_signature_conversion(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
- return %arg0 : tensor<4x8xf32>
-}
-// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>) -> [[TYPE]]<[[RANK]]>
-// CHECK-NEXT: return %[[ARG0]]
-
-// -----
-
-// CHECK-LABEL: func @func_with_unranked_arg_and_result
-func @func_with_unranked_arg_and_result(%arg0: tensor<*xf32>) -> tensor<*xf32> {
- return %arg0 : tensor<*xf32>
-}
-// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) -> memref<*xf32>
-// CHECK-NEXT: return [[ARG]] : memref<*xf32>
-
-// -----
-
-// CHECK-LABEL: func @func_and_block_signature_conversion
-func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
- cond_br %cond, ^bb1, ^bb2
- ^bb1:
- br ^exit(%arg0 : tensor<2xf32>)
- ^bb2:
- br ^exit(%arg0 : tensor<2xf32>)
- ^exit(%arg2: tensor<2xf32>):
- return %arg1 : tensor<4x4xf32>
-}
-// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]]) -> [[RESULT_TYPE:.*]]
-// CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
-// CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
-// CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
-// CHECK-NEXT: return %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
- %buff = alloc() : memref<2xf32>
- return %arg1, %buff : tensor<5xf32>, memref<2xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>) -> (memref<5xf32>, memref<2xf32>)
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: return %[[CALLEE_ARG]], %[[ALLOC]]
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
- %x:2 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
- %y:2 = call @callee(%x#0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
- return %y#0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>) -> memref<5xf32>
-// CHECK: %[[X:.*]]:2 = call @callee(%[[CALLER_ARG]])
-// CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0)
-// CHECK: return %[[Y]]#0
-
-// -----
-
-// Test case: Testing BufferAssignmentCallOpConverter to see if it matches with the
-// signature of the new signature of the callee function when there are tuple typed
-// args and results. BufferAssignmentTypeConverter is set to flatten tuple typed
-// arguments. The tuple typed values should be decomposed and composed using
-// get_tuple_element and make_tuple operations of test dialect. Tensor types are
-// converted to Memref. Memref typed function results remain as function results.
-
-// CHECK-LABEL: func @callee
-func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
- return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
- %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
- %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
- return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
-
-// -----
-
-// Test case: Testing BufferAssignmentFuncOpConverter and
-// BufferAssignmentReturnOpConverter to see if the return operation matches with
-// the new function signature when there are tuple typed args and results.
-// BufferAssignmentTypeConverter is set to flatten tuple typed arguments. The tuple
-// typed values should be decomposed and composed using get_tuple_element and
-// make_tuple operations of test dialect. Tensor types are converted to Memref.
-// Memref typed function results remain as function results.
-
-// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
-func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
- return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
-}
-// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>
-// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32)
-// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
-// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
+++ /dev/null
-// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s
-
-// CHECK-LABEL: func @func_signature_conversion
-func @func_signature_conversion(%arg0: tensor<4x8xf32>) {
- return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>) {
-
-// -----
-
-// Only tensor typed function result should be converted to memref and move to the
-// function arguments list. The other memref function results remain as function
-// results.
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @memref_in_function_results
-func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>) -> (tensor<5xf32>, memref<10xf32>, memref<15xf32>) {
- %0 = alloc() : memref<15xf32>
- %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg0 : tensor<5xf32>) {
- ^bb0(%gen1_arg0: f32):
- %tmp1 = exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- } -> tensor<5xf32>
- return %1, %arg1, %0 : tensor<5xf32>, memref<10xf32>, memref<15xf32>
-}
-// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, %[[RESULT:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<10xf32>, memref<15xf32>)
-// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK: %[[LINALG_ALLOC:.*]] = alloc()
-// CHECK: linalg.copy(%[[LINALG_ALLOC]], %[[RESULT]])
-// CHECK: return %[[ARG1]], %[[FIRST_ALLOC]]
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
- return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>) {
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
- return %arg0, %arg1 : i1, f16
-}
-// CHECK: (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: f16) -> (i1, f16)
-// CHECK: return %[[ARG0]], %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @complex_signature_conversion
-func @complex_signature_conversion(%arg0: tensor<4x8xf32>, %arg1: i1, %arg2: tensor<5x5xf64>,%arg3: f16) -> (i1, tensor<5x5xf64>, f16, tensor<4x8xf32>) {
- return %arg1, %arg2, %arg3, %arg0 : i1, tensor<5x5xf64>, f16, tensor<4x8xf32>
-}
-// CHECK: (%[[ARG0:.*]]: memref<4x8xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5x5xf64>, %[[ARG3:.*]]: f16,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5x5xf64>, %[[RESULT2:.*]]: memref<4x8xf32>) -> (i1, f16) {
-// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]])
-// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT2]])
-// CHECK-NEXT: return %[[ARG1]], %[[ARG3]]
-
-// -----
-
-// CHECK-LABEL: func @non_void_to_void_return_op_converter
-func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
- return %arg0 : tensor<4x8xf32>
-}
-// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>, %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) {
-// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT]])
-// CHECK-NEXT: return
-
-// -----
-
-// CHECK-LABEL: func @func_and_block_signature_conversion
-func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
- cond_br %cond, ^bb1, ^bb2
- ^bb1:
- br ^exit(%arg0 : tensor<2xf32>)
- ^bb2:
- br ^exit(%arg0 : tensor<2xf32>)
- ^exit(%arg2: tensor<2xf32>):
- return %arg1 : tensor<4x4xf32>
-}
-// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]], %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) {
-// CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
-// CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
-// CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
-// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT]])
-// CHECK-NEXT: return
-
-// -----
-
-// Test Case: Simple case for checking if BufferizePlacer creates AllocOps right before GenericOps.
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @compute_allocs_position_simple
-func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{
- %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg0 : tensor<2xf32>) {
- ^bb0(%gen1_arg0: f32):
- %tmp1 = exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- } -> tensor<2xf32>
- %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%0 : tensor<2xf32>) {
- ^bb0(%gen2_arg0: f32):
- %tmp2 = exp %gen2_arg0 : f32
- linalg.yield %tmp2 : f32
- } -> tensor<2xf32>
- return %1 : tensor<2xf32>
-}
-// CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>,
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[FIRST_ALLOC]]
-// CHECK: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[FIRST_ALLOC]]{{.*}} outs(%[[SECOND_ALLOC]]
-
-// -----
-
-// Test Case: if-else case for checking if BufferizePlacer creates AllocOps right before GenericOps.
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @compute_allocs_position
-func @compute_allocs_position(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{
- %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg0 : tensor<2xf32>) {
- ^bb0(%gen1_arg0: f32):
- %tmp1 = exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- } -> tensor<2xf32>
- %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%0 : tensor<2xf32>) {
- ^bb0(%gen2_arg0: f32):
- %tmp2 = exp %gen2_arg0 : f32
- linalg.yield %tmp2 : f32
- } -> tensor<2xf32>
- cond_br %cond, ^bb1(%arg0, %0: tensor<2xf32>, tensor<2xf32>),
- ^bb2(%0, %arg0: tensor<2xf32>, tensor<2xf32>)
- ^bb1(%arg1 : tensor<2xf32>, %arg2 : tensor<2xf32>):
- %2 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg0 : tensor<2xf32>) {
- ^bb0(%gen3_arg0: f32):
- %tmp3 = exp %gen3_arg0 : f32
- linalg.yield %tmp3 : f32
- } -> tensor<2xf32>
- %3 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%2 : tensor<2xf32>) {
- ^bb0(%gen4_arg0: f32):
- %tmp4 = exp %gen4_arg0 : f32
- linalg.yield %tmp4 : f32
- } -> tensor<2xf32>
- br ^exit(%arg1, %arg2 : tensor<2xf32>, tensor<2xf32>)
- ^bb2(%arg3 : tensor<2xf32>, %arg4 : tensor<2xf32>):
- %4 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg0 : tensor<2xf32>) {
- ^bb0(%gen5_arg0: f32):
- %tmp5 = exp %gen5_arg0 : f32
- linalg.yield %tmp5 : f32
- } -> tensor<2xf32>
- %5 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%4 : tensor<2xf32>) {
- ^bb0(%gen6_arg0: f32):
- %tmp6 = exp %gen6_arg0 : f32
- linalg.yield %tmp6 : f32
- } -> tensor<2xf32>
- br ^exit(%arg3, %arg4 : tensor<2xf32>, tensor<2xf32>)
- ^exit(%arg5 : tensor<2xf32>, %arg6 : tensor<2xf32>):
- %6 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg0 : tensor<2xf32>) {
- ^bb0(%gen7_arg0: f32):
- %tmp7 = exp %gen7_arg0 : f32
- linalg.yield %tmp7 : f32
- } -> tensor<2xf32>
- %7 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%6 : tensor<2xf32>) {
- ^bb0(%gen8_arg0: f32):
- %tmp8 = exp %gen8_arg0 : f32
- linalg.yield %tmp8 : f32
- } -> tensor<2xf32>
- return %7 : tensor<2xf32>
-}
-// CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>,
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC0]]
-// CHECK: %[[ALLOC1:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC0]]{{.*}} outs(%[[ALLOC1]]
-// CHECK: cond_br %{{.*}}, ^[[BB0:.*]]({{.*}}), ^[[BB1:.*]](
-// CHECK-NEXT: ^[[BB0]]
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC2]]
-// CHECK: %[[ALLOC3:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC2]]{{.*}} outs(%[[ALLOC3]]
-// CHECK: br ^[[EXIT:.*]]({{.*}})
-// CHECK-NEXT: ^[[BB1]]
-// CHECK-NEXT: %[[ALLOC4:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC4]]
-// CHECK: %[[ALLOC5:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC4]]{{.*}} outs(%[[ALLOC5]]
-// CHECK: br ^[[EXIT]]
-// CHECK-NEXT: ^[[EXIT]]
-// CHECK-NEXT: %[[ALLOC6:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOC6]]
-// CHECK: %[[ALLOC7:.*]] = alloc()
-// CHECK-NEXT: linalg.generic {{.*}} ins(%[[ALLOC6]]{{.*}} outs(%[[ALLOC7]]
-
-// -----
-
-// Test case: Checking BufferizeCallOpConverter and
-// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
-// together. The signature of `callee` after signature conversion would be:
-
-// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>) -> ()
-
-// The operands and results of caller and return operations must be matched
-// respectively.
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> tensor<5xf32> {
- %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
- ins(%arg1 : tensor<5xf32>) {
- ^bb0(%gen1_arg0: f32):
- %tmp1 = exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- } -> tensor<5xf32>
- return %0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, %[[CALLEE_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: linalg.generic
-// CHECK: linalg.copy(%[[ALLOC]], %[[CALLEE_RESULT]])
-// CHECK: return
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
- %x = call @callee(%arg0) : (tensor<5xf32>) -> tensor<5xf32>
- %y = call @callee(%x) : (tensor<5xf32>) -> tensor<5xf32>
- return %y : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, %[[CALLER_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK: call @callee(%[[CALLER_ARG]], %[[FIRST_ALLOC]])
-// CHECK: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK: call @callee(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
-// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[CALLER_RESULT]])
-// CHECK: return
-
-// -----
-
-// Test case: Checking BufferizeCallOpConverter and
-// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
-// together on functions that also have memref typed results. The signature of
-// `callee` after signature conversion would be:
-
-// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>)-> memref<2xf32>
-
-// where %arg0 is the input and %arg1 is the output buffer and the original memref
-// type result remain as the function result. Then, the rewriter should match the
-// caller's signature with the callee. Thus, two buffers will be allocated instead
-// of %x0 and %y0 and they are passed to the callers' operands list as the output
-// buffers. %x1 and %y1 remain as callers' results.
-
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
- %buff = alloc() : memref<2xf32>
- return %arg1, %buff : tensor<5xf32>, memref<2xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, %[[CALLEE_RESULT:.*]]: memref<5xf32>)
-// CHECK-SAME: memref<2xf32>
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: linalg.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]])
-// CHECK: return %[[ALLOC]]
-
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
- %x0, %x1 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
- %y0, %y1 = call @callee(%x0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
- return %y0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, %[[CALLER_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[X0:.*]] = alloc()
-// CHECK: %[[X1:.*]] = call @callee(%[[CALLER_ARG]], %[[X0]])
-// CHECK: %[[Y0:.*]] = alloc()
-// CHECK: %[[Y1:.*]] = call @callee(%[[X0]], %[[Y0]])
-// CHECK: linalg.copy(%[[Y0]], %[[CALLER_RESULT]])
-// CHECK: return
-
-// -----
-
-// CHECK-LABEL: func @func_with_unranked_arg
-func @func_with_unranked_arg(%arg0: tensor<*xf32>) {
- return
-}
-// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>)
-
-// -----
-
-// Test case: Testing BufferAssignmentCallOpConverter to see if it matches with the
-// signature of the new signature of the callee function when there are tuple typed
-// args and results. BufferAssignmentTypeConverter is set to flatten tuple typed
-// arguments. The tuple typed values should be decomposed and composed using
-// get_tuple_element and make_tuple operations of test dialect. Tensor types are
-// converted to Memref. Memref typed function results are appended to the function
-// arguments list.
-
-// CHECK-LABEL: func @callee
-func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
- return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>)
-// CHECK-SAME: i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_ELEM]]
-
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
- %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
- %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
- return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>)
-// CHECK-SAME: i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_ELEM]]
-
-// -----
-
-// Test case: Testing BufferAssignmentFuncOpConverter and
-// BufferAssignmentReturnOpConverter to see if the return operation matches with
-// the new function signature when there are tuple typed args and results.
-// BufferAssignmentTypeConverter is set to flatten tuple typed arguments. The tuple
-// typed values should be decomposed and composed using get_tuple_element and
-// make_tuple operations of test dialect. Tensor types are converted to Memref.
-// Memref typed function results are appended to the function arguments list.
-
-// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
-func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
- return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
-}
-// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>, %[[RESULT1:.*]]: memref<10xf32>
-// CHECK-SAME: (i1, i1, f32)
-// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
-// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
--- /dev/null
+// RUN: mlir-opt -test-finalizing-bufferize-with-allowed-memref-results -split-input-file %s | FileCheck %s
+
+// Since allowMemrefEscaping is on for Buffer Placement in this test pass, all
+// tensor typed function results are converted to memref and remain as function
+// results. All memref typed function results will escape from the deallocation
+// phase of Buffer Placement.
+
+// CHECK-LABEL: func @void_function_signature_conversion
+func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) {
+ return
+}
+// CHECK: ({{.*}}: memref<4x8xf32>)
+
+// -----
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @complex_signature_conversion
+func @complex_signature_conversion(%arg0: tensor<5xf32>, %arg1: memref<10xf32>, %arg2: i1, %arg3: f16) -> (i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16) {
+ %0 = alloc() : memref<15xf32>
+ return %arg2, %arg0, %arg1, %0, %arg3 : i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16
+}
+// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16)
+// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16)
+// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK: return %[[ARG2]], %[[ARG0]], %[[ARG1]], %[[FIRST_ALLOC]], %[[ARG3]]
+
+// -----
+
+// CHECK-LABEL: func @no_signature_conversion_is_needed
+func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
+ return
+}
+// CHECK: ({{.*}}: memref<4x8xf32>)
+
+// -----
+
+// CHECK-LABEL: func @no_signature_conversion_is_needed
+func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
+ return %arg0, %arg1 : i1, f16
+}
+// CHECK: (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: f16) -> (i1, f16)
+// CHECK: return %[[ARG0]], %[[ARG1]]
+
+// -----
+
+// CHECK-LABEL: func @simple_signature_conversion
+func @simple_signature_conversion(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
+ return %arg0 : tensor<4x8xf32>
+}
+// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>) -> [[TYPE]]<[[RANK]]>
+// CHECK-NEXT: return %[[ARG0]]
+
+// -----
+
+// CHECK-LABEL: func @func_with_unranked_arg_and_result
+func @func_with_unranked_arg_and_result(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+ return %arg0 : tensor<*xf32>
+}
+// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) -> memref<*xf32>
+// CHECK-NEXT: return [[ARG]] : memref<*xf32>
+
+// -----
+
+// CHECK-LABEL: func @func_and_block_signature_conversion
+func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
+ cond_br %cond, ^bb1, ^bb2
+ ^bb1:
+ br ^exit(%arg0 : tensor<2xf32>)
+ ^bb2:
+ br ^exit(%arg0 : tensor<2xf32>)
+ ^exit(%arg2: tensor<2xf32>):
+ return %arg1 : tensor<4x4xf32>
+}
+// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]]) -> [[RESULT_TYPE:.*]]
+// CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
+// CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
+// CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
+// CHECK-NEXT: return %[[ARG1]]
+
+// -----
+
+// CHECK-LABEL: func @callee
+func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
+ %buff = alloc() : memref<2xf32>
+ return %arg1, %buff : tensor<5xf32>, memref<2xf32>
+}
+// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>) -> (memref<5xf32>, memref<2xf32>)
+// CHECK: %[[ALLOC:.*]] = alloc()
+// CHECK: return %[[CALLEE_ARG]], %[[ALLOC]]
+
+// CHECK-LABEL: func @caller
+func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
+ %x:2 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+ %y:2 = call @callee(%x#0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+ return %y#0 : tensor<5xf32>
+}
+// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>) -> memref<5xf32>
+// CHECK: %[[X:.*]]:2 = call @callee(%[[CALLER_ARG]])
+// CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0)
+// CHECK: return %[[Y]]#0
+
+// -----
+
+// Test case: Testing BufferizeCallOpConverter to see if it matches with the
+// signature of the new signature of the callee function when there are tuple typed
+// args and results. BufferizeTypeConverter is set to flatten tuple typed
+// arguments. The tuple typed values should be decomposed and composed using
+// get_tuple_element and make_tuple operations of test dialect. Tensor types are
+// converted to Memref. Memref typed function results remain as function results.
+
+// CHECK-LABEL: func @callee
+func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
+ return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
+}
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
+
+// CHECK-LABEL: func @caller
+func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
+ %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+ %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+ return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
+}
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
+
+// -----
+
+// Test case: Testing BufferizeFuncOpConverter and
+// BufferizeReturnOpConverter to see if the return operation matches with
+// the new function signature when there are tuple typed args and results.
+// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple
+// typed values should be decomposed and composed using get_tuple_element and
+// make_tuple operations of test dialect. Tensor types are converted to Memref.
+// Memref typed function results remain as function results.
+
+// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
+func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
+ return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
+}
+// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>
+// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32)
+// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
+// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
+// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
--- /dev/null
+// RUN: mlir-opt -test-finalizing-bufferize -split-input-file %s | FileCheck %s
+
+// CHECK-LABEL: func @func_signature_conversion
+func @func_signature_conversion(%arg0: tensor<4x8xf32>) {
+ return
+}
+// CHECK: ({{.*}}: memref<4x8xf32>) {
+
+// -----
+
+// Only tensor typed function result should be converted to memref and move to the
+// function arguments list. The other memref function results remain as function
+// results.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @memref_in_function_results
+func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>) -> (tensor<5xf32>, memref<10xf32>, memref<15xf32>) {
+ %0 = alloc() : memref<15xf32>
+ return %arg0, %arg1, %0 : tensor<5xf32>, memref<10xf32>, memref<15xf32>
+}
+// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, %[[RESULT:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<10xf32>, memref<15xf32>)
+// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK: linalg.copy(%[[ARG0]], %[[RESULT]])
+// CHECK: return %[[ARG1]], %[[FIRST_ALLOC]]
+
+// -----
+
+// CHECK-LABEL: func @no_signature_conversion_is_needed
+func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
+ return
+}
+// CHECK: ({{.*}}: memref<4x8xf32>) {
+
+// -----
+
+// CHECK-LABEL: func @no_signature_conversion_is_needed
+func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
+ return %arg0, %arg1 : i1, f16
+}
+// CHECK: (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: f16) -> (i1, f16)
+// CHECK: return %[[ARG0]], %[[ARG1]]
+
+// -----
+
+// CHECK-LABEL: func @complex_signature_conversion
+func @complex_signature_conversion(%arg0: tensor<4x8xf32>, %arg1: i1, %arg2: tensor<5x5xf64>,%arg3: f16) -> (i1, tensor<5x5xf64>, f16, tensor<4x8xf32>) {
+ return %arg1, %arg2, %arg3, %arg0 : i1, tensor<5x5xf64>, f16, tensor<4x8xf32>
+}
+// CHECK: (%[[ARG0:.*]]: memref<4x8xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5x5xf64>, %[[ARG3:.*]]: f16,
+// CHECK-SAME: %[[RESULT1:.*]]: memref<5x5xf64>, %[[RESULT2:.*]]: memref<4x8xf32>) -> (i1, f16) {
+// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]])
+// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT2]])
+// CHECK-NEXT: return %[[ARG1]], %[[ARG3]]
+
+// -----
+
+// CHECK-LABEL: func @non_void_to_void_return_op_converter
+func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
+ return %arg0 : tensor<4x8xf32>
+}
+// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>, %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) {
+// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT]])
+// CHECK-NEXT: return
+
+// -----
+
+// CHECK-LABEL: func @func_and_block_signature_conversion
+func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
+ cond_br %cond, ^bb1, ^bb2
+ ^bb1:
+ br ^exit(%arg0 : tensor<2xf32>)
+ ^bb2:
+ br ^exit(%arg0 : tensor<2xf32>)
+ ^exit(%arg2: tensor<2xf32>):
+ return %arg1 : tensor<4x4xf32>
+}
+// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]], %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) {
+// CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
+// CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
+// CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
+// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT]])
+// CHECK-NEXT: return
+
+// -----
+
+// Test case: Checking BufferizeCallOpConverter and
+// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
+// together. The signature of `callee` after signature conversion would be:
+
+// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>) -> ()
+
+// The operands and results of caller and return operations must be matched
+// respectively.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @callee
+func @callee(%arg0: tensor<5xf32>) -> tensor<5xf32> {
+ return %arg0 : tensor<5xf32>
+}
+// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, %[[CALLEE_RESULT:.*]]: memref<5xf32>)
+// CHECK: linalg.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]])
+// CHECK: return
+
+// CHECK-LABEL: func @caller
+func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
+ %x = call @callee(%arg0) : (tensor<5xf32>) -> tensor<5xf32>
+ %y = call @callee(%x) : (tensor<5xf32>) -> tensor<5xf32>
+ return %y : tensor<5xf32>
+}
+// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, %[[CALLER_RESULT:.*]]: memref<5xf32>)
+// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK: call @callee(%[[CALLER_ARG]], %[[FIRST_ALLOC]])
+// CHECK: %[[SECOND_ALLOC:.*]] = alloc()
+// CHECK: call @callee(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
+// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[CALLER_RESULT]])
+// CHECK: return
+
+// -----
+
+// Test case: Checking BufferizeCallOpConverter and
+// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
+// together on functions that also have memref typed results. The signature of
+// `callee` after signature conversion would be:
+
+// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>)-> memref<2xf32>
+
+// where %arg0 is the input and %arg1 is the output buffer and the original memref
+// type result remain as the function result. Then, the rewriter should match the
+// caller's signature with the callee. Thus, two buffers will be allocated instead
+// of %x0 and %y0 and they are passed to the callers' operands list as the output
+// buffers. %x1 and %y1 remain as callers' results.
+
+
+// CHECK-LABEL: func @callee
+func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
+ %buff = alloc() : memref<2xf32>
+ return %arg1, %buff : tensor<5xf32>, memref<2xf32>
+}
+// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>, %[[CALLEE_RESULT:.*]]: memref<5xf32>)
+// CHECK-SAME: memref<2xf32>
+// CHECK: %[[ALLOC:.*]] = alloc()
+// CHECK: linalg.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]])
+// CHECK: return %[[ALLOC]]
+
+
+// CHECK-LABEL: func @caller
+func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
+ %x0, %x1 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+ %y0, %y1 = call @callee(%x0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+ return %y0 : tensor<5xf32>
+}
+// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>, %[[CALLER_RESULT:.*]]: memref<5xf32>)
+// CHECK: %[[X0:.*]] = alloc()
+// CHECK: %[[X1:.*]] = call @callee(%[[CALLER_ARG]], %[[X0]])
+// CHECK: %[[Y0:.*]] = alloc()
+// CHECK: %[[Y1:.*]] = call @callee(%[[X0]], %[[Y0]])
+// CHECK: linalg.copy(%[[Y0]], %[[CALLER_RESULT]])
+// CHECK: return
+
+// -----
+
+// CHECK-LABEL: func @func_with_unranked_arg
+func @func_with_unranked_arg(%arg0: tensor<*xf32>) {
+ return
+}
+// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>)
+
+// -----
+
+// Test case: Testing BufferizeCallOpConverter to see if it matches with the
+// signature of the new signature of the callee function when there are tuple typed
+// args and results. BufferizeTypeConverter is set to flatten tuple typed
+// arguments. The tuple typed values should be decomposed and composed using
+// get_tuple_element and make_tuple operations of test dialect. Tensor types are
+// converted to Memref. Memref typed function results are appended to the function
+// arguments list.
+
+// CHECK-LABEL: func @callee
+func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
+ return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
+}
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>)
+// CHECK-SAME: i1
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]])
+// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]])
+// CHECK-NEXT: return %[[SECOND_ELEM]]
+
+
+// CHECK-LABEL: func @caller
+func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
+ %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+ %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+ return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
+}
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>)
+// CHECK-SAME: i1
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]])
+// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]])
+// CHECK-NEXT: return %[[SECOND_ELEM]]
+
+// -----
+
+// Test case: Testing BufferizeFuncOpConverter and
+// BufferizeReturnOpConverter to see if the return operation matches with
+// the new function signature when there are tuple typed args and results.
+// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple
+// typed values should be decomposed and composed using get_tuple_element and
+// make_tuple operations of test dialect. Tensor types are converted to Memref.
+// Memref typed function results are appended to the function arguments list.
+
+// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
+func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
+ return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
+}
+// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>, %[[RESULT1:.*]]: memref<10xf32>
+// CHECK-SAME: (i1, i1, f32)
+// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
+// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
+// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]])
+// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]])
+// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
# Exclude tests from libMLIR.so
add_mlir_library(MLIRTestTransforms
TestAffineLoopParametricTiling.cpp
- TestBufferPlacement.cpp
TestExpandMemRefReshape.cpp
TestExpandTanh.cpp
TestCallGraph.cpp
TestConvertGPUKernelToHsaco.cpp
TestDominance.cpp
TestDynamicPipeline.cpp
+ TestFinalizingBufferize.cpp
TestLoopFusion.cpp
TestGpuMemoryPromotion.cpp
TestGpuParallelLoopMapping.cpp
+++ /dev/null
-//===- TestBufferPlacement.cpp - Test for buffer placement ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements logic for testing buffer placement including its
-// utility converters.
-//
-//===----------------------------------------------------------------------===//
-
-#include "TestDialect.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Transforms/Bufferize.h"
-
-using namespace mlir;
-
-namespace {
-/// This pass tests the computeAllocPosition helper method and bufferize
-/// operation converters. Furthermore, this pass converts linalg operations on
-/// tensors to linalg operations on buffers to prepare them for the
-/// BufferPlacement pass that can be applied afterwards.
-/// `allowMemrefFunctionResults` informs the buffer placement to allow functions
-/// that have memref typed results. Buffer assignment operation converters will
-/// be adapted respectively. It will also allow memref typed results to escape
-/// from the deallocation.
-template <bool allowMemrefFunctionResults>
-struct TestBufferPlacementPreparationPass
- : mlir::PassWrapper<
- TestBufferPlacementPreparationPass<allowMemrefFunctionResults>,
- OperationPass<ModuleOp>> {
-
- /// Converts tensor-type generic linalg operations to memref ones using
- /// bufferize.
- /// TODO: Avoid the copy-pasta by exposing the pattern from BufferPlacement.h
- /// This is limited by not wanting BufferPlacement to depend on Linalg. Fixing
- /// this probably requires an OpConversionPattern over generic Operation*. For
- /// now only RewritePattern but not ConversionPattern allow this.
-
- class GenericOpConverter
- : public BufferizeOpConversionPattern<linalg::GenericOp> {
- public:
- using BufferizeOpConversionPattern<
- linalg::GenericOp>::BufferizeOpConversionPattern;
-
- LogicalResult
- matchAndRewrite(linalg::GenericOp op, ArrayRef<Value> operands,
- ConversionPatternRewriter &rewriter) const final {
- linalg::GenericOpAdaptor adaptor(operands,
- op.getOperation()->getAttrDictionary());
-
- // All inputs need to be turned into buffers first. Until then, bail out.
- if (llvm::any_of(adaptor.inputs(), [](Value in) {
- return !in.getType().isa<MemRefType>();
- }))
- return failure();
-
- // All init_tensors need to be turned into buffers first. Until then, bail
- // out.
- if (llvm::any_of(adaptor.init_tensors(), [](Value in) {
- return !in.getType().isa<MemRefType>();
- }))
- return failure();
-
- Location loc = op.getLoc();
- SmallVector<Value, 2> newOutputBuffers;
- newOutputBuffers.reserve(op.getNumOutputs());
- newOutputBuffers.append(adaptor.output_buffers().begin(),
- adaptor.output_buffers().end());
-
- // Update all types to memref types.
- // Assume the init tensors fold onto the first results.
- // TODO: update this assumption because the reality is more complex under
- // linalg on tensor based transformations.
- for (auto en : llvm::enumerate(op.getResultTypes())) {
- auto type = en.value().cast<ShapedType>();
- if (!type.hasStaticShape())
- return rewriter.notifyMatchFailure(
- op, "dynamic shapes not currently supported");
- auto memrefType =
- MemRefType::get(type.getShape(), type.getElementType());
- bool foldedInitTensor = en.index() < op.getNumInitTensors();
- if (foldedInitTensor) {
- // Dealing with an init tensor requires distinguishing between 1-use
- // and many-use cases which would create aliasing and WAR hazards.
- Value initTensor = op.getInitTensor(en.index());
- Value initBuffer = adaptor.init_tensors()[en.index()];
- if (initTensor.hasOneUse()) {
- newOutputBuffers.push_back(initBuffer);
- continue;
- }
- auto alloc = rewriter.create<AllocOp>(loc, memrefType);
- rewriter.create<linalg::CopyOp>(loc, initBuffer, alloc);
- newOutputBuffers.push_back(alloc);
- } else {
- auto alloc = rewriter.create<AllocOp>(loc, memrefType);
- newOutputBuffers.push_back(alloc);
- }
- }
-
- // Generate a new linalg operation that works on buffers.
- auto linalgOp = rewriter.create<linalg::GenericOp>(
- loc,
- /*resultTensorTypes=*/ArrayRef<Type>{},
- /*inputs=*/adaptor.inputs(),
- /*outputBuffers=*/newOutputBuffers,
- /*initTensors=*/ValueRange{}, op.indexing_maps(), op.iterator_types(),
- op.docAttr(), op.library_callAttr(), op.symbol_sourceAttr());
-
- // Create a new block in the region of the new Generic Op.
- Block &oldBlock = op.getRegion().front();
- Region &newRegion = linalgOp.region();
- Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
- oldBlock.getArgumentTypes());
-
- // Add the result arguments that do not come from init_tensors to the new
- // block.
- // TODO: update this assumption because the reality is more complex under
- // linalg on tensor based transformations.
- for (Value v : ValueRange(newOutputBuffers)
- .drop_front(adaptor.init_tensors().size()))
- newBlock->addArgument(v.getType().cast<MemRefType>().getElementType());
-
- // Clone the body of the old block to the new block.
- BlockAndValueMapping mapping;
- for (unsigned i = 0; i < oldBlock.getNumArguments(); i++)
- mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i));
-
- OpBuilder::InsertionGuard guard(rewriter);
- rewriter.setInsertionPointToEnd(newBlock);
- for (auto &op : oldBlock.getOperations()) {
- Operation *clonedOp = rewriter.clone(op, mapping);
- mapping.map(op.getResults(), clonedOp->getResults());
- }
-
- // Replace the results of the old op with the new output buffers.
- rewriter.replaceOp(op, newOutputBuffers);
- return success();
- }
- };
-
- void populateTensorLinalgToBufferLinalgConversionPattern(
- MLIRContext *context, BufferizeTypeConverter &converter,
- OwningRewritePatternList &patterns) {
- populateWithBufferizeOpConversionPatterns<mlir::ReturnOp, mlir::ReturnOp,
- linalg::CopyOp>(
- context, converter, patterns);
- patterns.insert<GenericOpConverter>(context, converter);
- }
-
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<TestDialect>();
- registry.insert<linalg::LinalgDialect>();
- }
-
- void runOnOperation() override {
- MLIRContext &context = this->getContext();
- ConversionTarget target(context);
- BufferizeTypeConverter converter;
-
- // Mark all Standard operations legal.
- target.addLegalDialect<StandardOpsDialect>();
- target.addLegalOp<MakeTupleOp>();
- target.addLegalOp<GetTupleElementOp>();
- target.addLegalOp<ModuleOp>();
- target.addLegalOp<ModuleTerminatorOp>();
-
- // Mark all Linalg operations illegal as long as they work on tensors.
- auto isLegalOperation = [&](Operation *op) {
- return converter.isLegal(op);
- };
- target.addDynamicallyLegalDialect<linalg::LinalgDialect>(isLegalOperation);
-
- // Mark Standard Return operations illegal as long as one operand is tensor.
- target.addDynamicallyLegalOp<mlir::ReturnOp>([&](mlir::ReturnOp returnOp) {
- return converter.isLegal(returnOp.getOperandTypes());
- });
-
- // Mark Standard Call Operation illegal as long as it operates on tensor.
- target.addDynamicallyLegalOp<mlir::CallOp>(
- [&](mlir::CallOp callOp) { return converter.isLegal(callOp); });
-
- // Mark the function whose arguments are in tensor-type illegal.
- target.addDynamicallyLegalOp<FuncOp>([&](FuncOp funcOp) {
- return converter.isSignatureLegal(funcOp.getType()) &&
- converter.isLegal(&funcOp.getBody());
- });
-
- auto kind = allowMemrefFunctionResults
- ? BufferizeTypeConverter::KeepAsFunctionResult
- : BufferizeTypeConverter::AppendToArgumentsList;
- converter.setResultConversionKind<RankedTensorType, MemRefType>(kind);
- converter.setResultConversionKind<UnrankedTensorType, UnrankedMemRefType>(
- kind);
-
- converter.addDecomposeTypeConversion(
- [](TupleType tupleType, SmallVectorImpl<Type> &types) {
- tupleType.getFlattenedTypes(types);
- return success();
- });
-
- converter.addArgumentMaterialization(
- [](OpBuilder &builder, TupleType resultType, ValueRange inputs,
- Location loc) -> Optional<Value> {
- if (inputs.size() == 1)
- return llvm::None;
- TypeRange TypeRange = inputs.getTypes();
- SmallVector<Type, 2> types(TypeRange.begin(), TypeRange.end());
- TupleType tuple = TupleType::get(types, builder.getContext());
- mlir::Value value = builder.create<MakeTupleOp>(loc, tuple, inputs);
- return value;
- });
-
- converter.addDecomposeValueConversion([](OpBuilder &builder, Location loc,
- TupleType resultType, Value value,
- SmallVectorImpl<Value> &values) {
- for (unsigned i = 0, e = resultType.size(); i < e; ++i) {
- Value res = builder.create<GetTupleElementOp>(
- loc, resultType.getType(i), value, builder.getI32IntegerAttr(i));
- values.push_back(res);
- }
- return success();
- });
-
- OwningRewritePatternList patterns;
- populateTensorLinalgToBufferLinalgConversionPattern(&context, converter,
- patterns);
- if (failed(applyFullConversion(this->getOperation(), target,
- std::move(patterns))))
- this->signalPassFailure();
- };
-};
-} // end anonymous namespace
-
-namespace mlir {
-void registerTestBufferPlacementPreparationPass() {
- PassRegistration<
- TestBufferPlacementPreparationPass</*allowMemrefFunctionResults=*/false>>(
- "test-buffer-placement-preparation",
- "Tests buffer placement helper methods including its "
- "operation-conversion patterns");
-}
-
-void registerTestPreparationPassWithAllowedMemrefResults() {
- PassRegistration<
- TestBufferPlacementPreparationPass</*allowMemrefFunctionResults=*/true>>(
- "test-buffer-placement-preparation-with-allowed-memref-results",
- "Tests the helper operation converters of buffer placement for allowing "
- "functions to have memref typed results.");
-}
-} // end namespace mlir
--- /dev/null
+//===- TestFinalizingBufferize.cpp - Finalizing bufferization ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that exercises the functionality of finalizing
+// bufferizations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestDialect.h"
+#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Bufferize.h"
+
+using namespace mlir;
+
+namespace {
+/// This pass is a test for "finalizing" bufferize conversions.
+///
+/// A "finalizing" bufferize conversion is one that performs a "full" conversion
+/// and expects all tensors to be gone from the program. This in particular
+/// involves rewriting funcs (including block arguments of the contained
+/// region), calls, and returns. The unique property of finalizing bufferization
+/// passes is that they cannot be done via a local transformation with suitable
+/// materializations to ensure composability (as other bufferization passes do).
+/// For example, if a call is rewritten, the callee needs to be rewritten
+/// otherwise the IR will end up invalid. Thus, finalizing bufferization passes
+/// require an atomic change to the entire program (e.g. the whole module).
+///
+/// `allowMemrefFunctionResults` informs the buffer finalization policy to allow
+/// functions that have memref typed results. Patterns involved with converting
+/// func/call/return respect the finalization policy to ensure a consistent
+/// atomic conversion of the entire module. `allowMemrefFunctionResults` also
+/// allows memref typed results to escape from the deallocation.
+///
+/// TODO: Split out BufferizeFinalizationPolicy from BufferizeTypeConverter.
+template <bool allowMemrefFunctionResults>
+struct TestFinalizingBufferizePass
+ : mlir::PassWrapper<TestFinalizingBufferizePass<allowMemrefFunctionResults>,
+ OperationPass<ModuleOp>> {
+
+ void getDependentDialects(DialectRegistry ®istry) const override {
+ registry.insert<TestDialect>();
+ registry.insert<linalg::LinalgDialect>();
+ }
+
+ void runOnOperation() override {
+ MLIRContext &context = this->getContext();
+ ConversionTarget target(context);
+ BufferizeTypeConverter converter;
+
+ // Mark all Standard operations legal.
+ target.addLegalDialect<StandardOpsDialect>();
+ target.addLegalOp<linalg::CopyOp>();
+ target.addLegalOp<MakeTupleOp>();
+ target.addLegalOp<GetTupleElementOp>();
+ target.addLegalOp<ModuleOp>();
+ target.addLegalOp<ModuleTerminatorOp>();
+
+ // Mark Standard Return operations illegal as long as one operand is tensor.
+ target.addDynamicallyLegalOp<mlir::ReturnOp>([&](mlir::ReturnOp returnOp) {
+ return converter.isLegal(returnOp.getOperandTypes());
+ });
+
+ // Mark Standard Call Operation illegal as long as it operates on tensor.
+ target.addDynamicallyLegalOp<mlir::CallOp>(
+ [&](mlir::CallOp callOp) { return converter.isLegal(callOp); });
+
+ // Mark the function whose arguments are in tensor-type illegal.
+ target.addDynamicallyLegalOp<FuncOp>([&](FuncOp funcOp) {
+ return converter.isSignatureLegal(funcOp.getType()) &&
+ converter.isLegal(&funcOp.getBody());
+ });
+
+ auto kind = allowMemrefFunctionResults
+ ? BufferizeTypeConverter::KeepAsFunctionResult
+ : BufferizeTypeConverter::AppendToArgumentsList;
+ converter.setResultConversionKind<RankedTensorType, MemRefType>(kind);
+ converter.setResultConversionKind<UnrankedTensorType, UnrankedMemRefType>(
+ kind);
+
+ converter.addDecomposeTypeConversion(
+ [](TupleType tupleType, SmallVectorImpl<Type> &types) {
+ tupleType.getFlattenedTypes(types);
+ return success();
+ });
+
+ converter.addArgumentMaterialization(
+ [](OpBuilder &builder, TupleType resultType, ValueRange inputs,
+ Location loc) -> Optional<Value> {
+ if (inputs.size() == 1)
+ return llvm::None;
+ TypeRange TypeRange = inputs.getTypes();
+ SmallVector<Type, 2> types(TypeRange.begin(), TypeRange.end());
+ TupleType tuple = TupleType::get(types, builder.getContext());
+ mlir::Value value = builder.create<MakeTupleOp>(loc, tuple, inputs);
+ return value;
+ });
+
+ converter.addDecomposeValueConversion([](OpBuilder &builder, Location loc,
+ TupleType resultType, Value value,
+ SmallVectorImpl<Value> &values) {
+ for (unsigned i = 0, e = resultType.size(); i < e; ++i) {
+ Value res = builder.create<GetTupleElementOp>(
+ loc, resultType.getType(i), value, builder.getI32IntegerAttr(i));
+ values.push_back(res);
+ }
+ return success();
+ });
+
+ OwningRewritePatternList patterns;
+ populateWithBufferizeOpConversionPatterns<mlir::ReturnOp, mlir::ReturnOp,
+ linalg::CopyOp>(
+ &context, converter, patterns);
+ if (failed(applyFullConversion(this->getOperation(), target,
+ std::move(patterns))))
+ this->signalPassFailure();
+ };
+};
+} // end anonymous namespace
+
+namespace mlir {
+void registerTestFinalizingBufferizePass() {
+ PassRegistration<
+ TestFinalizingBufferizePass</*allowMemrefFunctionResults=*/false>>(
+ "test-finalizing-bufferize", "Tests finalizing bufferize conversions");
+}
+
+void registerTestPreparationPassWithAllowedMemrefResults() {
+ PassRegistration<
+ TestFinalizingBufferizePass</*allowMemrefFunctionResults=*/true>>(
+ "test-finalizing-bufferize-with-allowed-memref-results",
+ "Tests finalizing buffierize conversions, allowing functions to have "
+ "memref typed results.");
+}
+} // end namespace mlir
void registerTestAffineLoopParametricTilingPass();
void registerTestAffineLoopUnswitchingPass();
void registerTestAllReduceLoweringPass();
-void registerTestBufferPlacementPreparationPass();
void registerTestCallGraphPass();
void registerTestConstantFold();
void registerTestConvVectorization();
void registerTestDynamicPipelinePass();
void registerTestExpandMemRefReshapePass();
void registerTestExpandTanhPass();
+void registerTestFinalizingBufferizePass();
void registerTestFunc();
void registerTestGpuMemoryPromotionPass();
void registerTestGpuParallelLoopMappingPass();
registerTestConvertGPUKernelToHsacoPass();
#endif
registerTestAffineLoopParametricTilingPass();
- registerTestBufferPlacementPreparationPass();
registerTestDominancePass();
registerTestDynamicPipelinePass();
+ registerTestFinalizingBufferizePass();
registerTestFunc();
registerTestExpandTanhPass();
registerTestExpandMemRefReshapePass();