/// Cross-op multiple uses of %A, the first vector.transfer which has interfering reads must alloc.
// CHECK: %[[ALLOC:.*]] = memref.alloc
- // CHECK: linalg.copy({{.*}}, %[[ALLOC]])
+ // CHECK: memref.copy {{.*}}, %[[ALLOC]]
// CHECK-NEXT: vector.transfer_write {{.*}}, %[[ALLOC]]
%r0 = vector.transfer_write %vec, %A[%c0] : vector<4xf32>, tensor<?xf32>
// CHECK: %[[REALLOC1:.*]] = memref.alloc
// Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
- // CHECK: linalg.copy(%[[A0]], %[[REALLOC3]]
+ // CHECK: memref.copy %[[A0]], %[[REALLOC3]]
// CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC3]]
- // CHECK: linalg.copy(%[[t0]], %[[SV_A0]])
+ // CHECK: memref.copy %[[t0]], %[[SV_A0]]
%r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
// Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
- // CHECK: linalg.copy(%[[A0]]
+ // CHECK: memref.copy %[[A0]]
// CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC2]]
- // CHECK: linalg.copy(%[[t1]], %[[SV_A0_2]])
+ // CHECK: memref.copy %[[t1]], %[[SV_A0_2]]
%r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
// Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice.
- // CHECK: linalg.copy(%[[A1]]
+ // CHECK: memref.copy %[[A1]]
// CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC1]]
- // CHECK: linalg.copy(%[[t0]], %[[SV_A1]])
+ // CHECK: memref.copy %[[t0]], %[[SV_A1]]
%r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
// Do not realloc the large tensor. Copy the tensor.extract_slice.
// CHECK-NOT: alloc
// CHECK: %[[SV_A1_2:.*]] = memref.subview %[[A1]]
- // CHECK: linalg.copy(%[[t1]], %[[SV_A1_2]])
+ // CHECK: memref.copy %[[t1]], %[[SV_A1_2]]
%r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: return %[[REALLOC3]], %[[REALLOC2]], %[[REALLOC1]] :
// CHECK-NOT: alloc
// CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
- // CHECK: linalg.copy(%[[t]], %[[SV_A]])
+ // CHECK: memref.copy %[[t]], %[[SV_A]]
%r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
/// Overwrite A inplace.
// CHECK-NOT: alloc
// CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
/// Overwrite A inplace by copying into the subview.
- // CHECK: linalg.copy(%[[t]], %[[SV_A]])
+ // CHECK: memref.copy %[[t]], %[[SV_A]]
%r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: return
-> tensor<?xf32>
{
// CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) {alignment = 128 : i64} : memref<?xf32>
- // CHECK: linalg.copy(%[[A]], %[[ALLOC]]) : memref<?xf32{{.*}}, memref<?xf32>
+ // CHECK: memref.copy %[[A]], %[[ALLOC]] : memref<?xf32{{.*}} to memref<?xf32>
// CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref<?xf32> to memref<4xf32>
- // CHECK: linalg.copy(%[[t]], %[[SV]]) : memref<4xf32, #map>, memref<4xf32>
+ // CHECK: memref.copy %[[t]], %[[SV]] : memref<4xf32, #map> to memref<4xf32>
// CHECK: memref.dealloc %[[ALLOC]] : memref<?xf32>
%r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
{
// CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
// CHECK: %[[CASTED:.*]] = memref.cast %[[ALLOC_FOR_A]]
- // CHECK: linalg.copy(%[[A]], %[[ALLOC_FOR_A]])
+ // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
// The first scf.for remains but just turns into dead code.
%r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
{
// CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
// CHECK: %[[CASTED:.*]] = memref.cast %[[ALLOC_FOR_A]]
- // CHECK: linalg.copy(%[[A]], %[[ALLOC_FOR_A]])
+ // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
// CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1]
// CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1]
-> (tensor<?xf32>, tensor<?xf32>)
{
// %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA
- // CHECK: linalg.copy(%[[C]], %[[svA]])
+ // CHECK: memref.copy %[[C]], %[[svA]]
%ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
// %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B
- // CHECK: linalg.copy(%[[C]], %[[svB]])
+ // CHECK: memref.copy %[[C]], %[[svB]]
%ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK-NOT: scf.yield
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]>
-// CHECK: linalg.copy(%[[A]], %[[alloc]])
+// CHECK: memref.copy %[[A]], %[[alloc]]
// CHECK: call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
call @some_external_func(%A) : (tensor<4xi32>) -> ()
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]>
-// CHECK: linalg.copy(%[[A]], %[[alloc]])
+// CHECK: memref.copy %[[A]], %[[alloc]]
// CHECK: call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
scf.execute_region {
call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> ()
-> (tensor<?xf32>, tensor<?xf32>)
{
// CHECK-NEXT: %[[SVA:.*]] = memref.subview %[[A]]
- // CHECK-NEXT: linalg.copy(%[[C]], %[[SVA]]) : memref<4xf32, #[[$DYN_1D_MAP]]>, memref<4xf32, #[[$DYN_1D_MAP]]>
+ // CHECK-NEXT: memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
%ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK-NEXT: %[[SVB:.*]] = memref.subview %[[B]]
- // CHECK-NEXT: linalg.copy(%[[C]], %[[SVB]]) : memref<4xf32, #[[$DYN_1D_MAP]]>, memref<4xf32, #[[$DYN_1D_MAP]]>
+ // CHECK-NEXT: memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
%ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32>
// scf.yield is empty and is elided
// %r0#0 requires a copy because we have no idea what the function is doing.
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
-// CHECK: linalg.copy(%[[B]], %[[alloc]])
+// CHECK: memref.copy %[[B]], %[[alloc]]
// CHECK-NEXT: call @some_external_func(%[[casted]]) : (memref<?xf32, #[[$DYN_1D_MAP]]>) -> ()
call @some_external_func(%r0#0) : (tensor<?xf32>) -> ()
iterators["parallel"]
{
// CHECK-NOT: alloc
- // CHECK: linalg.copy(%[[B]], %[[A]])
+ // CHECK: memref.copy %[[B]], %[[A]]
linalg.yield %B : tensor<?xf32>
// CHECK: linalg.yield
// CHECK-NOT: tensor
// CHECK: %[[ALLOC_B:.*]] = memref.alloc
// CHECK: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]]
// CHECK: %[[ALLOC_A:.*]] = memref.alloc
-// CHECK: linalg.copy(%[[A]], %[[ALLOC_A]])
-// CHECK: linalg.copy(%[[B]], %[[ALLOC_B]])
-// CHECK: linalg.copy(%[[C]], %[[ALLOC_C]])
+// CHECK: memref.copy %[[A]], %[[ALLOC_A]]
+// CHECK: memref.copy %[[B]], %[[ALLOC_B]]
+// CHECK: memref.copy %[[C]], %[[ALLOC_C]]
// CHECK: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]]
// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]])
call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> ()
// insert_slice is inplace but its source comes from an equivalent buffer
// that is not in place. So we must insert a copy of the small buffer into
// the bigger buffer.
- // CHECK: linalg.copy(%[[ALLOC]], %[[T]])
+ // CHECK: memref.copy %[[ALLOC]], %[[T]]
%7 = tensor.insert_slice %6 into %arg6[%arg3, %arg5] [8, 16] [1, 1] :
tensor<8x16xf32> into tensor<128x192xf32>
// CHECK-LABEL: func @tensor_cast_not_in_place(
// CHECK-SAME: %[[A:.*]]: memref<?xf32{{.*}}>, %[[B:.*]]: memref<?xf32{{.*}}>
// CHECK: %[[alloc:.*]] = memref.alloc
-// CHECK: linalg.copy(%[[A]], %[[alloc]])
-// CHECK: %[[cast:.*]] = memref.cast %[[alloc]]
+// CHECK: memref.copy %[[A]], %[[alloc]]
+// CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
+// CHECK: memref.copy %[[alloc]], %[[subview]]
func @tensor_cast_not_in_place(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = false}, %idx: index)
%1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
- // CHECK: linalg.copy(%[[arg0]], %[[alloc]])
+ // CHECK: memref.copy %[[arg0]], %[[alloc]]
// CHECK: call @inner_func_2(%[[casted]])
%3 = call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
scf.yield %t1 : tensor<?xf32>
%t3: tensor<?x?xf32> {linalg.inplaceable = false},
%s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
// CHECK: %[[alloc:.*]] = memref.alloc
- // CHECK: linalg.copy(%[[t1]], %[[alloc]])
+ // CHECK: memref.copy %[[t1]], %[[alloc]]
// CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[alloc]] : {{.*}})
%r = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
{
// Make sure that a copy is inserted here.
// CHECK: %[[ALLOC:.*]] = memref.alloc
- // CHECK: linalg.copy(%[[t0]], %[[ALLOC]])
+ // CHECK: memref.copy %[[t0]], %[[ALLOC]]
// CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
%r0 =linalg.generic #trait outs (%t0 : tensor<?xf32>) {
^bb(%0: f32) :
%cst = arith.constant 0.0 : f32
%idx = arith.constant 0 : index
// CHECK: %[[alloc:.*]] = memref.alloc
- // CHECK: linalg.copy(%[[t1]], %[[alloc]])
+ // CHECK: memref.copy %[[t1]], %[[alloc]]
// CHECK: memref.store %{{.*}}, %[[alloc]]
%w = tensor.insert %cst into %t1[%idx] : tensor<?xf32>
// CHECK: %[[select:.*]] = select %{{.*}}, %[[t1]], %[[t2]]
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
- // CHECK: linalg.copy(%[[t1]], %[[alloc]])
+ // CHECK: memref.copy %[[t1]], %[[alloc]]
// CHECK: %[[select:.*]] = select %{{.*}}, %[[casted]], %[[t2]]
%s = std.select %c, %t1, %t2 : tensor<?xf32>