To keep the pass simple, users should apply cleanup passes manually when necessary. In particular, `-cse -canonicalize` are often desireable to fold away self-copies that are created by the bufferization.
This addresses a comment in D120191.
Differential Revision: https://reviews.llvm.org/D155923
bufferization::getBufferType(selectOp.getResult(), options);
if (failed(targetType))
return failure();
- trueBuffer =
- rewriter.create<memref::CastOp>(loc, *targetType, trueBuffer);
- falseBuffer =
- rewriter.create<memref::CastOp>(loc, *targetType, falseBuffer);
+ if (trueBuffer.getType() != *targetType)
+ trueBuffer =
+ rewriter.create<memref::CastOp>(loc, *targetType, trueBuffer);
+ if (falseBuffer.getType() != *targetType)
+ falseBuffer =
+ rewriter.create<memref::CastOp>(loc, *targetType, falseBuffer);
}
replaceOpWithNewBufferizedOp<arith::SelectOp>(
this->numBufferDealloc = statistics.numBufferDealloc;
this->numTensorInPlace = statistics.numTensorInPlace;
this->numTensorOutOfPlace = statistics.numTensorOutOfPlace;
-
- if (opt.testAnalysisOnly)
- return;
-
- OpPassManager cleanupPipeline("builtin.module");
- cleanupPipeline.addPass(createCanonicalizerPass());
- cleanupPipeline.addPass(createCSEPass());
- cleanupPipeline.addPass(createLoopInvariantCodeMotionPass());
- (void)runPipeline(cleanupPipeline, moduleOp);
}
private:
-// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -cse -canonicalize -split-input-file | FileCheck %s
// CHECK: func @buffer_forwarding_conflict(
// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
%c5 = arith.constant 5 : index
// CHECK-NOT: memref.alloc
- // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1]
%blank = tensor.empty() : tensor<5xf32>
// CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} {
%iv_i32 = arith.index_cast %iv : index to i32
%f = arith.sitofp %iv_i32 : i32 to f32
+ // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1]
// CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]]
%filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32>
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> (tensor<?xf32>)
// The result of an unknown op is not writable. Always generate a copy.
- // CHECK: %[[dim:.*]] = tensor.dim %[[dummy]]
+ // CHECK: %[[dim:.*]] = memref.dim %[[dummy_memref]]
// CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]])
// CHECK: memref.copy %[[dummy_memref]], %[[alloc]]
// CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
%c0 = arith.constant 0 : index
// CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]]
- // CHECK: %[[dim:.*]] = tensor.dim %[[A]]
+ // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]]
// CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]])
// CHECK: memref.copy %[[A_memref]], %[[alloc]]
// CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
// -----
-// CHECK-LABEL: func @from_unranked_to_unranked
+// CHECK-LABEL: func @from_unranked_to_unranked(
+// CHECK-SAME: %[[arg0:.*]]: tensor<*xi32>
func.func @from_unranked_to_unranked(%arg0: tensor<*xi32>) -> tensor<*xi32> {
- // CHECK: return %arg{{.*}} : tensor<*xi32>
+ // CHECK: %[[m:.*]] = bufferization.to_memref %[[arg0]] : memref<*xi32>
+ // CHECK: %[[t:.*]] = bufferization.to_tensor %[[m]]
+ // CHECK: return %[[t]] : tensor<*xi32>
%0 = tensor.cast %arg0 : tensor<*xi32> to tensor<*xi32>
return %0 : tensor<*xi32>
}
// Note: Default is function-boundary-type-conversion=infer-layout-map
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -canonicalize -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-NO-DEALLOC-PASS
// Run fuzzer with different seeds.
// CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
// CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
- // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1]
- // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1]
-
// CHECK: scf.for {{.*}}
// CHECK-NOT: iter_args
%r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
-> (tensor<?xf32>, tensor<?xf32>)
{
// %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA
+ // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1]
// CHECK: memref.copy %[[C]], %[[svA]]
%ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
// %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B
+ // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1]
// CHECK: memref.copy %[[C]], %[[svB]]
%ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1]
// CHECK: scf.forall (%[[tidx:.*]]) in (%[[idx2]])
%2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor<?xf32>) {
+ // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1]
%6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor<?xf32> to tensor<?xf32>
// CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref<?xf32
%8 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
- // Self-copy will DCE away later.
- // CHECK: memref.copy %[[subview]], %[[subview]]
+ // CHECK-NOT: memref.copy
// Empty terminator is elided from pretty-printing.
// CHECK-NOT: scf.forall.in_parallel
// CHECK: %[[alloc1:.*]] = memref.alloc
// CHECK: memref.copy %[[arg2]], %[[alloc1]]
- // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1]
// CHECK: scf.forall (%[[tidx:.*]]) in (%[[idx2]])
%2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor<?xf32>) {
+ // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1]
%6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor<?xf32> to tensor<?xf32>
// CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref<?xf32
%8 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
-
- // Now the copy of the actual insert_slice. (It will fold away.)
- // CHECK: memref.copy %[[subview1]], %[[subview1]]
+ // CHECK-NOT: memref.copy
// Empty terminator is elided from pretty-printing.
// CHECK-NOT: scf.forall.in_parallel
// CHECK-LABEL: func @tensor_cast_not_in_place(
// CHECK-SAME: %[[A:.*]]: memref<?xf32{{.*}}>, %[[B:.*]]: memref<?xf32{{.*}}>
+// CHECK: %[[casted:.*]] = memref.cast %[[A]] : memref<?xf32, strided<[?], offset: ?>> to memref<4xf32, strided<[?], offset: ?>>
// CHECK: %[[alloc:.*]] = memref.alloc
-// CHECK: memref.copy %[[A]], %[[alloc]]
+// CHECK: memref.copy %[[casted]], %[[alloc]]
// CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
// CHECK: memref.copy %[[alloc]], %[[subview]]
func.func @tensor_cast_not_in_place(
vector.print %vec : vector<10xf32>
// Write back a different value (not %1).
- // CHECK: memref.copy %[[b]], %[[t]]
+ // CHECK: %[[subview:.*]] = memref.subview %[[t]][0] [10] [1]
+ // CHECK: memref.copy %[[b]], %[[subview]]
%2 = tensor.insert_slice %b into %t[0][10][1] : tensor<10xf32> into tensor<10xf32>
return %2 : tensor<10xf32>
}