return r;
}
+/// Return `true` if the given tensor value is a memory write. Most values are
+/// tensor writes, but ops that define a tensor SSA value without specifying its
+/// contents (e.g., init_tensor) are not.
+static bool isMemoryWrite(Value value, const AnalysisState &state) {
+ auto opResult = value.dyn_cast<OpResult>();
+ if (!opResult)
+ return true;
+ auto bufferizableOp = state.getOptions().dynCastBufferizableOp(value);
+ if (!bufferizableOp)
+ return true;
+ return bufferizableOp.isMemoryWrite(opResult, state);
+}
+
/// Annotate IR with details about the detected RaW conflict.
static void annotateConflict(OpOperand *uRead, OpOperand *uConflictingWrite,
Value lastWrite) {
AnalysisState &state, const BufferizationAliasInfo &aliasInfo) {
const BufferizationOptions &options = state.getOptions();
- // Gather all written aliases.
+ // Gather all written aliases. Skip over aliases that are not actual writes.
SmallVector<Value> writtenAliases;
for (OpOperand *uWrite : usesWrite)
- writtenAliases.push_back(uWrite->get());
+ if (isMemoryWrite(uWrite->get(), state))
+ writtenAliases.push_back(uWrite->get());
// Find the inner-most enclosing repetitive region of each alias. If this is
// the same region for every alias, save it in `repetitiveRegionOfWrites`.
Optional<Region *> repetitiveRegionOfWrites =
// Note: iter_args of loops are not aliases of their respective block
// arguments, so op domanice can be used when analyzing ops that operate
// on them.
+ //
+ // Note: If `writtenAliases` is empty, there are no memory writes outside
+ // of the repetitive region of conflictingWritingOp, which means that all
+ // relevant aliases are inside the same repetitive region.
bool canUseOpDominance =
+ writtenAliases.empty() ||
repetitiveRegionOfWrites ==
- getEnclosingRepetitiveRegion(conflictingWritingOp);
+ getEnclosingRepetitiveRegion(conflictingWritingOp);
// No conflict if the readingOp dominates conflictingWritingOp, i.e., the
// write is not visible when reading.
return %r0 : tensor<?xf32>
}
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_init_tensor_in_place(
+func.func @write_to_same_init_tensor_in_place(
+ %A : tensor<?xf32> {linalg.inplaceable = true},
+ %lb : index, %ub : index, %step : index, %sz: index, %sz2: index)
+ -> (tensor<?xf32>)
+{
+ %B = linalg.init_tensor [%sz2] : tensor<?xf32>
+
+ // CHECK: scf.for {{.*}} {
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ %i2 = arith.index_cast %i : index to i32
+ %i3 = arith.sitofp %i2 : i32 to f32
+ // %B is written multiple times inside a loop, but it is an init_tensor.
+ // CHECK: tensor.insert
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
+ %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+ %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+ scf.yield %A2 : tensor<?xf32>
+ }
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+ return %r0 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_init_tensor_out_of_place(
+func.func @write_to_same_init_tensor_out_of_place(
+ %A : tensor<?xf32> {linalg.inplaceable = true},
+ %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32)
+ -> (tensor<?xf32>)
+{
+ %B = linalg.init_tensor [%sz2] : tensor<?xf32>
+ %C = tensor.insert %f into %B[%lb] : tensor<?xf32>
+
+ // CHECK: scf.for {{.*}} {
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ %i2 = arith.index_cast %i : index to i32
+ %i3 = arith.sitofp %i2 : i32 to f32
+ // %C is written multiple times inside a loop. Even though %C aliases with
+ // an init_tensor, out-of-bounds bufferization is necessary because there is
+ // another alias (%C) outside of the loop.
+ // CHECK: tensor.insert
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]}
+ %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+ %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+ scf.yield %A2 : tensor<?xf32>
+ }
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+ return %r0 : tensor<?xf32>
+}