return false;
}
+/// For each given value, find the closest enclosing repetitive region. If this
+/// is the same region for each value, return it. Otherwise return None.
+/// Note: If there is no enclosing repetitive region, return nullptr.
+static Optional<Region *>
+getCommonEnclosingRepetitiveRegion(ArrayRef<Value> values) {
+ if (values.empty())
+ return None;
+ Region *r = getEnclosingRepetitiveRegion(values.front());
+ for (Value value : values.drop_front())
+ if (getEnclosingRepetitiveRegion(value) != r)
+ return None;
+ return r;
+}
+
/// Annotate IR with details about the detected RaW conflict.
static void annotateConflict(OpOperand *uRead, OpOperand *uConflictingWrite,
Value lastWrite) {
AnalysisState &state, const BufferizationAliasInfo &aliasInfo) {
const BufferizationOptions &options = state.getOptions();
+ // Gather all written aliases.
+ SmallVector<Value> writtenAliases;
+ for (OpOperand *uWrite : usesWrite)
+ writtenAliases.push_back(uWrite->get());
+ // Find the inner-most enclosing repetitive region of each alias. If this is
+ // the same region for every alias, save it in `repetitiveRegionOfWrites`.
+ Optional<Region *> repetitiveRegionOfWrites =
+ getCommonEnclosingRepetitiveRegion(writtenAliases);
+
for (OpOperand *uRead : usesRead) {
Operation *readingOp = uRead->getOwner();
// met for uConflictingWrite to be an actual conflict.
Operation *conflictingWritingOp = uConflictingWrite->getOwner();
+ // Check if conflictingWritingOp is in the same repetitive region as all
+ // written aliases. If this is not the case, there is no meaningful
+ // `happensBefore` relationship because conflictingWritingOp may be
+ // executed multiple times. E.g.:
+ //
+ // %0 = ... : tensor<?xf32>
+ // scf.for ... {
+ // "reading_op"(%0) : tensor<?xf32>
+ // %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32>
+ // ...
+ // }
+ //
+ // In the above example, reading_op happens before writing_op according to
+ // op dominance. However, both ops may happen multiple times; in
+ // particular, the second execution of reading_op happens after the first
+ // execution of writing_op. This is problematic if the tensor they operate
+ // on (%0) is defined outside of the loop.
+ //
+ // Counter example:
+ //
+ // scf.for ... {
+ // %0 = ... : tensor<?xf32>
+ // "reading_op"(%0) : tensor<?xf32>
+ // %1 = "writing_op"(%0) : tensor<?xf32> -> tensor<?xf32>
+ // ...
+ // }
+ //
+ // In this example, %0 is in the same repetitive region as
+ // conflictingWritingOp, so op dominance can be used to compute the
+ // `happensBefore` relationship.
+ //
+ // Note: iter_args of loops are not aliases of their respective block
+ // arguments, so op domanice can be used when analyzing ops that operate
+ // on them.
+ bool canUseOpDominance =
+ repetitiveRegionOfWrites ==
+ getEnclosingRepetitiveRegion(conflictingWritingOp);
+
// No conflict if the readingOp dominates conflictingWritingOp, i.e., the
// write is not visible when reading.
- if (happensBefore(readingOp, conflictingWritingOp, domInfo))
+ //
+ // Note: If ops are executed multiple times (e.g., because they are inside
+ // a loop), there may be no meaningful `happensBefore` relationship.
+ if (canUseOpDominance &&
+ happensBefore(readingOp, conflictingWritingOp, domInfo))
continue;
// No conflict if the reading use equals the use of the conflicting write.
- // A use cannot conflict with itself. Note: Just being the same op is not
- // enough. It has to be the same use.
- if (uConflictingWrite == uRead)
+ // A use cannot conflict with itself.
+ //
+ // Note: Just being the same op is not enough. It has to be the same use.
+ // Note: If the op is executed multiple times (e.g., because it is inside
+ // a loop), it may be conflicting with itself.
+ if (canUseOpDominance && uConflictingWrite == uRead)
continue;
// No conflict if the op interface says so.
continue;
// Ops are not conflicting if they are in mutually exclusive regions.
- if (insideMutuallyExclusiveRegions(readingOp, conflictingWritingOp))
+ //
+ // Note: If ops are executed multiple times (e.g., because they are inside
+ // a loop), mutually exclusive regions may be executed multiple
+ // times.
+ if (canUseOpDominance &&
+ insideMutuallyExclusiveRegions(readingOp, conflictingWritingOp))
continue;
// Check all possible last writes.
return %f, %w : f32, tensor<?xf32>
}
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_tensor_in_loop_out_of_place(
+func @write_to_same_tensor_in_loop_out_of_place(
+ %A : tensor<?xf32> {linalg.inplaceable = true},
+ %B : tensor<?xf32> {linalg.inplaceable = true},
+ %lb : index, %ub : index, %step : index, %sz: index)
+ -> (tensor<?xf32>)
+{
+ // CHECK: scf.for {{.*}} {
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ %i2 = arith.index_cast %i : index to i32
+ %i3 = arith.sitofp %i2 : i32 to f32
+ // The tensor.insert is out-of-place because the %B is written multiple
+ // times inside a loop.
+ // CHECK: tensor.insert
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]}
+ %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+ %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+ scf.yield %A2 : tensor<?xf32>
+ }
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+ return %r0 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_tensor_in_loop_in_place(
+func @write_to_same_tensor_in_loop_in_place(
+ %A : tensor<?xf32> {linalg.inplaceable = true},
+ %lb : index, %ub : index, %step : index, %sz: index)
+ -> (tensor<?xf32>)
+{
+ // CHECK: scf.for {{.*}} {
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ %B = linalg.init_tensor [%sz] : tensor<?xf32>
+ %i2 = arith.index_cast %i : index to i32
+ %i3 = arith.sitofp %i2 : i32 to f32
+ // The tensor.insert is in-place because the %B is defined inside the loop.
+ // CHECK: tensor.insert
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
+ %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+ %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+ scf.yield %A2 : tensor<?xf32>
+ }
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+ return %r0 : tensor<?xf32>
+}