From: Matthias Springer <springerm@google.com>
Date: Mon, 27 Jun 2022 14:28:38 +0000 (+0200)
Subject: [mlir][bufferize] Infer memory space in all bufferization patterns
X-Git-Tag: upstream/15.0.7~3447
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c0b0b6a00a2c54a2c5fdc5626d240351021def5f;p=platform%2Fupstream%2Fllvm.git

[mlir][bufferize] Infer memory space in all bufferization patterns

This change updates all remaining bufferization patterns (except for scf.while) and the remaining bufferization infrastructure to infer the memory space whenever possible instead of falling back to "0". (If a default memory space is set in the bufferization options, we still fall back to that value if the memory space could not be inferred.)

Differential Revision: https://reviews.llvm.org/D128423
---

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
index 8f739d7..61caa18 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
@@ -355,6 +355,25 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
           return bufferization::getMemRefType(tensorType, options);
         }]
       >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Return the memory space of the given tensor OpResult if specified on
+          this op. If not specified, return `failure`.
+
+          This method will never be called with OpResults that do not bufferize
+          to a memory allocation.
+        }],
+        /*retType=*/"FailureOr<unsigned>",
+        /*methodName=*/"getMemorySpace",
+        /*args=*/(ins "OpResult":$opResult),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/[{
+          assert(cast<BufferizableOpInterface>($_op.getOperation())
+                     .bufferizesToAllocation(opResult)
+                 && "expected allocation");
+          return failure();
+        }]
+      >,
   ];
 
   let extraClassDeclaration = [{
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
index 81dadee..df93e0e 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -47,8 +47,9 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
     another op.
 
     The optional `memory_space` attribute specifies the memory space when
-    bufferizing this op. If `memory_space` is not specified, the default memory
-    space is used during bufferization.
+    bufferizing this op. The memory space is inferred from `copy` if specified.
+    If neigher `copy` nor `memory_space` is specified, the default memory space
+    is used during bufferization.
 
     Both dense and sparse tensor types are supported. The result of a
     `bufferization.alloc_tensor` is a tensor value that can be used like any
@@ -81,6 +82,12 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
 
     bool bufferizesToAllocation(OpResult opResult) { return true; }
 
+    FailureOr<unsigned> getMemorySpace(OpResult opResult) {
+      if (getMemorySpace().hasValue())
+        return static_cast<unsigned>(*getMemorySpace());
+      return failure();
+    }
+
     bool bufferizesToMemoryRead(OpOperand &opOperand,
                                 const AnalysisState &state);
 
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 6cf0dfa..97684fb 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -347,6 +347,10 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> {
            "Bufferize function boundaries (experimental).">,
     Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
            "Specify if new allocations should be deallocated.">,
+    Option<"mustInferMemorySpace", "must-infer-memory-space", "bool",
+           /*default=*/"false",
+           "The memory space of an memref types must always be inferred. If "
+           "unset, a default memory space of 0 is used otherwise.">,
   ];
   let constructor = "mlir::bufferization::createTensorCopyInsertionPass()";
 }
diff --git a/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
index 24657bf..abfed7d 100644
--- a/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -26,6 +26,11 @@ struct ConstantOpInterface
                           const BufferizationOptions &options) const {
     auto constantOp = cast<arith::ConstantOp>(op);
 
+    // TODO: Implement memory space for this op. E.g., by adding a memory_space
+    // attribute to ConstantOp.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     // Only ranked tensors are supported.
     if (!constantOp.getType().isa<RankedTensorType>())
       return failure();
@@ -150,6 +155,10 @@ struct SelectOpInterface
       return failure();
     Value trueBuffer = *maybeTrueBuffer;
     Value falseBuffer = *maybeFalseBuffer;
+    BaseMemRefType trueType = trueBuffer.getType().cast<BaseMemRefType>();
+    BaseMemRefType falseType = falseBuffer.getType().cast<BaseMemRefType>();
+    if (trueType.getMemorySpaceAsInt() != falseType.getMemorySpaceAsInt())
+      return op->emitError("inconsistent memory space on true/false operands");
 
     // The "true" and the "false" operands must have the same type. If the
     // buffers have different types, they differ only in their layout map. Cast
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 6073c93..374fbd7 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -43,6 +43,13 @@ using namespace bufferization;
 constexpr const ::llvm::StringLiteral
     bufferization::BufferizableOpInterface::kInplaceableAttrName;
 
+/// Return the owner of the given value.
+static Operation *getOwnerOfValue(Value value) {
+  if (auto opResult = value.dyn_cast<OpResult>())
+    return opResult.getDefiningOp();
+  return value.cast<BlockArgument>().getOwner()->getParentOp();
+}
+
 /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
 /// shaped value is copied. Otherwise, a tensor with undefined contents is
 /// allocated.
@@ -84,10 +91,21 @@ FailureOr<Value> bufferization::allocateTensorForShapedValue(
       populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
   }
 
+  // Create AllocTensorOp.
   auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
                                                copy ? tensor : Value());
   allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName,
                          b.getBoolArrayAttr({escape}));
+
+  // Add 'memory_space' attribute. Not needed if 'copy' operand is specified.
+  if (copy)
+    return allocTensorOp.getResult();
+  FailureOr<BaseMemRefType> copyBufferType = getBufferType(tensor, options);
+  if (failed(copyBufferType))
+    return failure();
+  allocTensorOp.setMemorySpaceAttr(
+      b.getIntegerAttr(b.getIntegerType(64, /*isSigned=*/false),
+                       copyBufferType->getMemorySpaceAsInt()));
   return allocTensorOp.getResult();
 }
 
@@ -512,16 +530,43 @@ FailureOr<BaseMemRefType>
 bufferization::getBufferType(Value value, const BufferizationOptions &options) {
   auto tensorType = value.getType().dyn_cast<TensorType>();
   assert(tensorType && "unexpected non-tensor type");
+  Operation *op = getOwnerOfValue(value);
 
+  // ToTensorOp: Take buffer type directly from the op.
   if (auto toTensorOp = value.getDefiningOp<bufferization::ToTensorOp>())
     return toTensorOp.getMemref().getType().cast<BaseMemRefType>();
 
+  // If value is a bbArg of a bufferizable op: query op interface.
   if (auto bbArg = value.dyn_cast<BlockArgument>())
     if (auto bufferizableOp =
             options.dynCastBufferizableOp(bbArg.getOwner()->getParentOp()))
       return bufferizableOp.getBufferType(bbArg, options);
 
-  return getMemRefType(tensorType, options);
+  // Check value is a new buffer allocation with a memory space attribute. In
+  // that case we can at least infer the memory space.
+  Optional<unsigned> memorySpace = None;
+  if (auto opResult = value.dyn_cast<OpResult>()) {
+    if (auto bufferizableOp =
+            options.dynCastBufferizableOp(opResult.getDefiningOp())) {
+      if (bufferizableOp.bufferizesToAllocation(opResult)) {
+        FailureOr<unsigned> queriedMemorySpace =
+            bufferizableOp.getMemorySpace(opResult);
+        if (!failed(queriedMemorySpace))
+          memorySpace = *queriedMemorySpace;
+      }
+    }
+  }
+
+  // If we still do not know the memory space, use the default memory space (if
+  // any).
+  if (!memorySpace.hasValue())
+    memorySpace = options.defaultMemorySpace;
+
+  // If we still do not know the memory space, report a failure.
+  if (!memorySpace.hasValue())
+    return op->emitError("could not infer memory space");
+
+  return getMemRefType(tensorType, options, /*layout=*/{}, *memorySpace);
 }
 
 void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 188c08b..679c4e8 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -176,6 +176,9 @@ LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter,
   unsigned memorySpace;
   if (getMemorySpace().hasValue()) {
     memorySpace = *getMemorySpace();
+  } else if (getCopy()) {
+    memorySpace =
+        copyBuffer.getType().cast<BaseMemRefType>().getMemorySpaceAsInt();
   } else if (options.defaultMemorySpace.hasValue()) {
     memorySpace = *options.defaultMemorySpace;
   } else {
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
index cb320dc..786f12f 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -105,6 +105,8 @@ struct TensorCopyInsertionPass
       options.allowReturnAllocs = allowReturnAllocs;
       options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
       options.createDeallocs = createDeallocs;
+      if (mustInferMemorySpace)
+        options.defaultMemorySpace = None;
       if (failed(insertTensorCopies(getOperation(), options)))
         signalPassFailure();
     }
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index 0bf4fd3..3af3e5d 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -707,6 +707,8 @@ struct WhileOpInterface
     return success();
   }
 
+  // TODO: Implement getBufferType interface method and infer buffer types.
+
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto whileOp = cast<scf::WhileOp>(op);
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 6f24843..4d15ace 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -379,6 +379,10 @@ struct FromElementsOpInterface
                           const BufferizationOptions &options) const {
     auto fromElementsOp = cast<tensor::FromElementsOp>(op);
 
+    // TODO: Implement memory space for this op.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     // Allocate a buffer for the result.
     Location loc = op->getLoc();
     auto tensorType = fromElementsOp.getType().cast<RankedTensorType>();
@@ -435,6 +439,11 @@ struct GenerateOpInterface
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto generateOp = cast<tensor::GenerateOp>(op);
+
+    // TODO: Implement memory space for this op.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     auto tensorType = generateOp.getType().cast<RankedTensorType>();
     // Allocate memory.
     Location loc = op->getLoc();
@@ -792,7 +801,9 @@ struct ReshapeOpInterface
     if (failed(srcBuffer) || failed(shapeBuffer))
       return failure();
     auto resultTensorType = reshapeOp.getResult().getType().cast<TensorType>();
-    auto resultMemRefType = getMemRefType(resultTensorType, options);
+    auto resultMemRefType = getMemRefType(
+        resultTensorType, options, /*layout=*/{},
+        srcBuffer->getType().cast<BaseMemRefType>().getMemorySpaceAsInt());
     replaceOpWithNewBufferizedOp<memref::ReshapeOp>(
         rewriter, op, resultMemRefType, *srcBuffer, *shapeBuffer);
     return success();
diff --git a/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir
new file mode 100644
index 0000000..315da00
--- /dev/null
+++ b/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt %s -one-shot-bufferize="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_arith_select(%c: i1) -> tensor<10xf32> {
+  // Selecting tensors with different memory spaces. Such IR cannot be
+  // bufferized.
+  %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+  %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // expected-error @+2 {{inconsistent memory space on true/false operands}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %r = arith.select %c, %0, %1 : tensor<10xf32>
+  func.return %r : tensor<10xf32>
+}
+
+// -----
+
+func.func @constant_memory_space(%idx: index, %v: i32) -> tensor<3xi32> {
+  // expected-error @+2 {{memory space not implemented yet}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %cst = arith.constant dense<[5, 1000, 20]> : tensor<3xi32>
+  %0 = tensor.insert %v into %cst[%idx] : tensor<3xi32>
+  return %0 : tensor<3xi32>
+}
\ No newline at end of file
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
index f943654..5feeab0 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
@@ -6,3 +6,14 @@ func.func @alloc_tensor_without_memory_space() -> tensor<10xf32> {
   %0 = bufferization.alloc_tensor() : tensor<10xf32>
   return %0 : tensor<10xf32>
 }
+
+// -----
+
+func.func @memory_space_of_unknown_op() -> f32 {
+  %c0 = arith.constant 0 : index
+  // expected-error @+1 {{could not infer memory space}}
+  %t = "test.dummy_op"() : () -> (tensor<10xf32>)
+  // expected-error @+1 {{failed to bufferize op}}
+  %s = tensor.extract %t[%c0] : tensor<10xf32>
+  return %s : f32
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir
new file mode 100644
index 0000000..ba71090
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+// An alloc is inserted but the copy is emitted. Therefore, the memory space
+// should be specified on the alloc_tensor op.
+func.func @memory_space_of_unknown_op() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // expected-error @+1 {{could not infer memory space}}
+  %t = bufferization.alloc_tensor() : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
new file mode 100644
index 0000000..1397ccd
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file | FileCheck %s
+
+// CHECK-LABEL: func @unknown_op_copy
+func.func @unknown_op_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // CHECK: %[[dummy:.*]] = "test.dummy_op"() : () -> tensor<10xf32>
+  %t = "test.dummy_op"() : () -> tensor<10xf32>
+  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[dummy]]) {bufferization.escape = [false]} : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @alloc_tensor_copy
+func.func @alloc_tensor_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  %t = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
index 5bc08c6..b8646edc 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -40,10 +40,10 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 {
   // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
   // The second alloc_tensor should not have a copy operand.
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
 
   // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true], memory_space = 0 : ui64} : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
   return %0, %1 : tensor<5xf32>, tensor<5xf32>
@@ -55,7 +55,7 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 func.func @do_not_copy_when_overwritten(%t: tensor<5xf32>, %f: f32)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
@@ -74,7 +74,7 @@ func.func @do_not_copy_when_result_not_read(%t: tensor<5xf32>, %f: f32)
   -> (tensor<3xf32>)
 {
   %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<3xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<3xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
new file mode 100644
index 0000000..52338d0
--- /dev/null
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
@@ -0,0 +1,16 @@
+// RUN: mlir-opt %s -one-shot-bufferize -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_scf_if(%c: i1) -> tensor<10xf32> {
+  // Yielding tensors with different memory spaces. Such IR cannot be
+  // bufferized.
+  %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+  %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // expected-error @+2 {{inconsistent memory space on then/else branches}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %r = scf.if %c -> tensor<10xf32> {
+    scf.yield %0 : tensor<10xf32>
+  } else {
+    scf.yield %1 : tensor<10xf32>
+  }
+  func.return %r : tensor<10xf32>
+}
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index 5ea23b8..e2293cc 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -632,3 +632,80 @@ func.func @matmul(%arg0: tensor<8x8xf32>, %arg1: tensor<8x8xf32>, %arg2: tensor<
   }
   return %0 : tensor<8x8xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func @scf_if_memory_space
+func.func @scf_if_memory_space(%c: i1, %f: f32) -> (f32, f32)
+{
+  %c0 = arith.constant 0 : index
+  // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
+  %0 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<5xf32>
+  // CHECK: scf.if %{{.*}} -> (memref<5xf32, 1>) {
+  %1 = scf.if %c -> tensor<5xf32> {
+    // CHECK: %[[cloned:.*]] = bufferization.clone %[[alloc]]
+    // CHECK: scf.yield %[[cloned]]
+    scf.yield %0 : tensor<5xf32>
+  } else {
+    // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
+    // CHECK: memref.store %{{.*}}, %[[alloc2]]
+    // CHECK: %[[cloned2:.*]] = bufferization.clone %[[alloc2]]
+    // CHECK: memref.dealloc %[[alloc2]]
+    // CHECK: scf.yield %[[cloned2]]
+    %2 = tensor.insert %f into %0[%c0] : tensor<5xf32>
+    scf.yield %2 : tensor<5xf32>
+  }
+  %r0 = tensor.extract %0[%c0] : tensor<5xf32>
+  %r1 = tensor.extract %1[%c0] : tensor<5xf32>
+  return %r0, %r1 : f32, f32
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_execute_region_memory_space
+// CHECK: memref.alloc() {{.*}} : memref<5xf32, 1>
+// CHECK: memref.store
+// CHECK: memref.load
+// CHECK: memref.dealloc
+func.func @scf_execute_region_memory_space(%f: f32) -> f32 {
+  %c0 = arith.constant 0 : index
+  %0 = scf.execute_region -> tensor<5xf32> {
+    %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<5xf32>
+    %2 = tensor.insert %f into %1[%c0] : tensor<5xf32>
+    scf.yield %2 : tensor<5xf32>
+  }
+  %r = tensor.extract %0[%c0] : tensor<5xf32>
+  return %r : f32
+}
+
+// -----
+
+// Additional allocs are inserted in the loop body. We just check that all
+// allocs have the correct memory space.
+
+// CHECK-LABEL: func @scf_for_swapping_yields_memory_space
+func.func @scf_for_swapping_yields_memory_space(
+    %sz: index, %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
+  -> (f32, f32)
+{
+  // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+  // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+  %A = bufferization.alloc_tensor(%sz) {memory_space = 1 : ui64} : tensor<?xf32>
+  %B = bufferization.alloc_tensor(%sz) {memory_space = 1 : ui64} : tensor<?xf32>
+
+  // CHECK: scf.for {{.*}} {
+  %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
+      -> (tensor<?xf32>, tensor<?xf32>)
+  {
+    // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+    // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+    %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
+    %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
+    // Yield tensors in different order.
+    scf.yield %ttB, %ttA : tensor<?xf32>, tensor<?xf32>
+  }
+  // CHECK: }
+  %f0 = tensor.extract %r0#0[%step] : tensor<?xf32>
+  %f1 = tensor.extract %r0#1[%step] : tensor<?xf32>
+  return %f0, %f1: f32, f32
+}