This change updates all remaining bufferization patterns (except for scf.while) and the remaining bufferization infrastructure to infer the memory space whenever possible instead of falling back to "0". (If a default memory space is set in the bufferization options, we still fall back to that value if the memory space could not be inferred.)
Differential Revision: https://reviews.llvm.org/D128423
return bufferization::getMemRefType(tensorType, options);
}]
>,
+ InterfaceMethod<
+ /*desc=*/[{
+ Return the memory space of the given tensor OpResult if specified on
+ this op. If not specified, return `failure`.
+
+ This method will never be called with OpResults that do not bufferize
+ to a memory allocation.
+ }],
+ /*retType=*/"FailureOr<unsigned>",
+ /*methodName=*/"getMemorySpace",
+ /*args=*/(ins "OpResult":$opResult),
+ /*methodBody=*/"",
+ /*defaultImplementation=*/[{
+ assert(cast<BufferizableOpInterface>($_op.getOperation())
+ .bufferizesToAllocation(opResult)
+ && "expected allocation");
+ return failure();
+ }]
+ >,
];
let extraClassDeclaration = [{
another op.
The optional `memory_space` attribute specifies the memory space when
- bufferizing this op. If `memory_space` is not specified, the default memory
- space is used during bufferization.
+ bufferizing this op. The memory space is inferred from `copy` if specified.
+ If neigher `copy` nor `memory_space` is specified, the default memory space
+ is used during bufferization.
Both dense and sparse tensor types are supported. The result of a
`bufferization.alloc_tensor` is a tensor value that can be used like any
bool bufferizesToAllocation(OpResult opResult) { return true; }
+ FailureOr<unsigned> getMemorySpace(OpResult opResult) {
+ if (getMemorySpace().hasValue())
+ return static_cast<unsigned>(*getMemorySpace());
+ return failure();
+ }
+
bool bufferizesToMemoryRead(OpOperand &opOperand,
const AnalysisState &state);
"Bufferize function boundaries (experimental).">,
Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
"Specify if new allocations should be deallocated.">,
+ Option<"mustInferMemorySpace", "must-infer-memory-space", "bool",
+ /*default=*/"false",
+ "The memory space of an memref types must always be inferred. If "
+ "unset, a default memory space of 0 is used otherwise.">,
];
let constructor = "mlir::bufferization::createTensorCopyInsertionPass()";
}
const BufferizationOptions &options) const {
auto constantOp = cast<arith::ConstantOp>(op);
+ // TODO: Implement memory space for this op. E.g., by adding a memory_space
+ // attribute to ConstantOp.
+ if (options.defaultMemorySpace != static_cast<unsigned>(0))
+ return op->emitError("memory space not implemented yet");
+
// Only ranked tensors are supported.
if (!constantOp.getType().isa<RankedTensorType>())
return failure();
return failure();
Value trueBuffer = *maybeTrueBuffer;
Value falseBuffer = *maybeFalseBuffer;
+ BaseMemRefType trueType = trueBuffer.getType().cast<BaseMemRefType>();
+ BaseMemRefType falseType = falseBuffer.getType().cast<BaseMemRefType>();
+ if (trueType.getMemorySpaceAsInt() != falseType.getMemorySpaceAsInt())
+ return op->emitError("inconsistent memory space on true/false operands");
// The "true" and the "false" operands must have the same type. If the
// buffers have different types, they differ only in their layout map. Cast
constexpr const ::llvm::StringLiteral
bufferization::BufferizableOpInterface::kInplaceableAttrName;
+/// Return the owner of the given value.
+static Operation *getOwnerOfValue(Value value) {
+ if (auto opResult = value.dyn_cast<OpResult>())
+ return opResult.getDefiningOp();
+ return value.cast<BlockArgument>().getOwner()->getParentOp();
+}
+
/// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
/// shaped value is copied. Otherwise, a tensor with undefined contents is
/// allocated.
populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
}
+ // Create AllocTensorOp.
auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
copy ? tensor : Value());
allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName,
b.getBoolArrayAttr({escape}));
+
+ // Add 'memory_space' attribute. Not needed if 'copy' operand is specified.
+ if (copy)
+ return allocTensorOp.getResult();
+ FailureOr<BaseMemRefType> copyBufferType = getBufferType(tensor, options);
+ if (failed(copyBufferType))
+ return failure();
+ allocTensorOp.setMemorySpaceAttr(
+ b.getIntegerAttr(b.getIntegerType(64, /*isSigned=*/false),
+ copyBufferType->getMemorySpaceAsInt()));
return allocTensorOp.getResult();
}
bufferization::getBufferType(Value value, const BufferizationOptions &options) {
auto tensorType = value.getType().dyn_cast<TensorType>();
assert(tensorType && "unexpected non-tensor type");
+ Operation *op = getOwnerOfValue(value);
+ // ToTensorOp: Take buffer type directly from the op.
if (auto toTensorOp = value.getDefiningOp<bufferization::ToTensorOp>())
return toTensorOp.getMemref().getType().cast<BaseMemRefType>();
+ // If value is a bbArg of a bufferizable op: query op interface.
if (auto bbArg = value.dyn_cast<BlockArgument>())
if (auto bufferizableOp =
options.dynCastBufferizableOp(bbArg.getOwner()->getParentOp()))
return bufferizableOp.getBufferType(bbArg, options);
- return getMemRefType(tensorType, options);
+ // Check value is a new buffer allocation with a memory space attribute. In
+ // that case we can at least infer the memory space.
+ Optional<unsigned> memorySpace = None;
+ if (auto opResult = value.dyn_cast<OpResult>()) {
+ if (auto bufferizableOp =
+ options.dynCastBufferizableOp(opResult.getDefiningOp())) {
+ if (bufferizableOp.bufferizesToAllocation(opResult)) {
+ FailureOr<unsigned> queriedMemorySpace =
+ bufferizableOp.getMemorySpace(opResult);
+ if (!failed(queriedMemorySpace))
+ memorySpace = *queriedMemorySpace;
+ }
+ }
+ }
+
+ // If we still do not know the memory space, use the default memory space (if
+ // any).
+ if (!memorySpace.hasValue())
+ memorySpace = options.defaultMemorySpace;
+
+ // If we still do not know the memory space, report a failure.
+ if (!memorySpace.hasValue())
+ return op->emitError("could not infer memory space");
+
+ return getMemRefType(tensorType, options, /*layout=*/{}, *memorySpace);
}
void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
unsigned memorySpace;
if (getMemorySpace().hasValue()) {
memorySpace = *getMemorySpace();
+ } else if (getCopy()) {
+ memorySpace =
+ copyBuffer.getType().cast<BaseMemRefType>().getMemorySpaceAsInt();
} else if (options.defaultMemorySpace.hasValue()) {
memorySpace = *options.defaultMemorySpace;
} else {
options.allowReturnAllocs = allowReturnAllocs;
options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
options.createDeallocs = createDeallocs;
+ if (mustInferMemorySpace)
+ options.defaultMemorySpace = None;
if (failed(insertTensorCopies(getOperation(), options)))
signalPassFailure();
}
return success();
}
+ // TODO: Implement getBufferType interface method and infer buffer types.
+
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto whileOp = cast<scf::WhileOp>(op);
const BufferizationOptions &options) const {
auto fromElementsOp = cast<tensor::FromElementsOp>(op);
+ // TODO: Implement memory space for this op.
+ if (options.defaultMemorySpace != static_cast<unsigned>(0))
+ return op->emitError("memory space not implemented yet");
+
// Allocate a buffer for the result.
Location loc = op->getLoc();
auto tensorType = fromElementsOp.getType().cast<RankedTensorType>();
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto generateOp = cast<tensor::GenerateOp>(op);
+
+ // TODO: Implement memory space for this op.
+ if (options.defaultMemorySpace != static_cast<unsigned>(0))
+ return op->emitError("memory space not implemented yet");
+
auto tensorType = generateOp.getType().cast<RankedTensorType>();
// Allocate memory.
Location loc = op->getLoc();
if (failed(srcBuffer) || failed(shapeBuffer))
return failure();
auto resultTensorType = reshapeOp.getResult().getType().cast<TensorType>();
- auto resultMemRefType = getMemRefType(resultTensorType, options);
+ auto resultMemRefType = getMemRefType(
+ resultTensorType, options, /*layout=*/{},
+ srcBuffer->getType().cast<BaseMemRefType>().getMemorySpaceAsInt());
replaceOpWithNewBufferizedOp<memref::ReshapeOp>(
rewriter, op, resultMemRefType, *srcBuffer, *shapeBuffer);
return success();
--- /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_arith_select(%c: i1) -> tensor<10xf32> {
+ // Selecting tensors with different memory spaces. Such IR cannot be
+ // bufferized.
+ %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+ %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+ // expected-error @+2 {{inconsistent memory space on true/false operands}}
+ // expected-error @+1 {{failed to bufferize op}}
+ %r = arith.select %c, %0, %1 : tensor<10xf32>
+ func.return %r : tensor<10xf32>
+}
+
+// -----
+
+func.func @constant_memory_space(%idx: index, %v: i32) -> tensor<3xi32> {
+ // expected-error @+2 {{memory space not implemented yet}}
+ // expected-error @+1 {{failed to bufferize op}}
+ %cst = arith.constant dense<[5, 1000, 20]> : tensor<3xi32>
+ %0 = tensor.insert %v into %cst[%idx] : tensor<3xi32>
+ return %0 : tensor<3xi32>
+}
\ No newline at end of file
%0 = bufferization.alloc_tensor() : tensor<10xf32>
return %0 : tensor<10xf32>
}
+
+// -----
+
+func.func @memory_space_of_unknown_op() -> f32 {
+ %c0 = arith.constant 0 : index
+ // expected-error @+1 {{could not infer memory space}}
+ %t = "test.dummy_op"() : () -> (tensor<10xf32>)
+ // expected-error @+1 {{failed to bufferize op}}
+ %s = tensor.extract %t[%c0] : tensor<10xf32>
+ return %s : f32
+}
--- /dev/null
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+// An alloc is inserted but the copy is emitted. Therefore, the memory space
+// should be specified on the alloc_tensor op.
+func.func @memory_space_of_unknown_op() -> (tensor<10xf32>, tensor<10xf32>) {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ // expected-error @+1 {{could not infer memory space}}
+ %t = bufferization.alloc_tensor() : tensor<10xf32>
+ %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+ return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
--- /dev/null
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file | FileCheck %s
+
+// CHECK-LABEL: func @unknown_op_copy
+func.func @unknown_op_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ // CHECK: %[[dummy:.*]] = "test.dummy_op"() : () -> tensor<10xf32>
+ %t = "test.dummy_op"() : () -> tensor<10xf32>
+ // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[dummy]]) {bufferization.escape = [false]} : tensor<10xf32>
+ %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+ return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @alloc_tensor_copy
+func.func @alloc_tensor_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+ %t = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+ // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+ %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+ return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
{
// CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
// The second alloc_tensor should not have a copy operand.
- // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+ // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
// CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
- // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
+ // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true], memory_space = 0 : ui64} : tensor<5xf32>
%0 = bufferization.alloc_tensor() : tensor<5xf32>
%1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
return %0, %1 : tensor<5xf32>, tensor<5xf32>
func.func @do_not_copy_when_overwritten(%t: tensor<5xf32>, %f: f32)
-> (tensor<5xf32>, tensor<5xf32>)
{
- // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+ // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
// CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
%r = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>],
-> (tensor<3xf32>)
{
%0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
- // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<3xf32>
+ // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<3xf32>
// CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
%r = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>],
--- /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_scf_if(%c: i1) -> tensor<10xf32> {
+ // Yielding tensors with different memory spaces. Such IR cannot be
+ // bufferized.
+ %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+ %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+ // expected-error @+2 {{inconsistent memory space on then/else branches}}
+ // expected-error @+1 {{failed to bufferize op}}
+ %r = scf.if %c -> tensor<10xf32> {
+ scf.yield %0 : tensor<10xf32>
+ } else {
+ scf.yield %1 : tensor<10xf32>
+ }
+ func.return %r : tensor<10xf32>
+}
}
return %0 : tensor<8x8xf32>
}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_memory_space
+func.func @scf_if_memory_space(%c: i1, %f: f32) -> (f32, f32)
+{
+ %c0 = arith.constant 0 : index
+ // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
+ %0 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<5xf32>
+ // CHECK: scf.if %{{.*}} -> (memref<5xf32, 1>) {
+ %1 = scf.if %c -> tensor<5xf32> {
+ // CHECK: %[[cloned:.*]] = bufferization.clone %[[alloc]]
+ // CHECK: scf.yield %[[cloned]]
+ scf.yield %0 : tensor<5xf32>
+ } else {
+ // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
+ // CHECK: memref.store %{{.*}}, %[[alloc2]]
+ // CHECK: %[[cloned2:.*]] = bufferization.clone %[[alloc2]]
+ // CHECK: memref.dealloc %[[alloc2]]
+ // CHECK: scf.yield %[[cloned2]]
+ %2 = tensor.insert %f into %0[%c0] : tensor<5xf32>
+ scf.yield %2 : tensor<5xf32>
+ }
+ %r0 = tensor.extract %0[%c0] : tensor<5xf32>
+ %r1 = tensor.extract %1[%c0] : tensor<5xf32>
+ return %r0, %r1 : f32, f32
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_execute_region_memory_space
+// CHECK: memref.alloc() {{.*}} : memref<5xf32, 1>
+// CHECK: memref.store
+// CHECK: memref.load
+// CHECK: memref.dealloc
+func.func @scf_execute_region_memory_space(%f: f32) -> f32 {
+ %c0 = arith.constant 0 : index
+ %0 = scf.execute_region -> tensor<5xf32> {
+ %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<5xf32>
+ %2 = tensor.insert %f into %1[%c0] : tensor<5xf32>
+ scf.yield %2 : tensor<5xf32>
+ }
+ %r = tensor.extract %0[%c0] : tensor<5xf32>
+ return %r : f32
+}
+
+// -----
+
+// Additional allocs are inserted in the loop body. We just check that all
+// allocs have the correct memory space.
+
+// CHECK-LABEL: func @scf_for_swapping_yields_memory_space
+func.func @scf_for_swapping_yields_memory_space(
+ %sz: index, %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
+ -> (f32, f32)
+{
+ // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+ // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+ %A = bufferization.alloc_tensor(%sz) {memory_space = 1 : ui64} : tensor<?xf32>
+ %B = bufferization.alloc_tensor(%sz) {memory_space = 1 : ui64} : tensor<?xf32>
+
+ // CHECK: scf.for {{.*}} {
+ %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
+ -> (tensor<?xf32>, tensor<?xf32>)
+ {
+ // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+ // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+ %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
+ %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
+ // Yield tensors in different order.
+ scf.yield %ttB, %ttA : tensor<?xf32>, tensor<?xf32>
+ }
+ // CHECK: }
+ %f0 = tensor.extract %r0#0[%step] : tensor<?xf32>
+ %f1 = tensor.extract %r0#1[%step] : tensor<?xf32>
+ return %f0, %f1: f32, f32
+}