LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
<< ", size: " << size
<< ": make sure in bound with affine.min\n");
+
AffineExpr dim0, dim1, dim2;
bindDims(builder.getContext(), dim0, dim1, dim2);
- // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
- AffineMap minMap =
- AffineMap::inferFromExprList(
- ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
+
+ // Get the dimension size for this dimension. We need to first calculate
+ // the max index and then plus one. This is important because for
+ // convolution ops, we have its input window dimension's affine map of the
+ // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
+ // dimension and `s0` is stride. Directly use the dimension size of
+ // output/filer window dimensions will cause incorrect calculation.
+ AffineMap minusOneMap =
+ AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 - 1}})
.front();
- Value d = applyMapToValues(builder, loc, m, ubs).front();
+ AffineMap plusOneMap =
+ AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 + 1}})
+ .front();
+ auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
+ return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
+ .getResult();
+ }));
+ Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
+ Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
+
+ // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
+ AffineMap minMap = AffineMap::inferFromExprList(
+ {ArrayRef<AffineExpr>{dim0, dim1 - dim2}})
+ .front();
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
canonicalizeMapAndOperands(&minMap, &operands);
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)>
#map0 = affine_map<(d0, d1) -> (d0, d0 + d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
%cst = constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32>
- // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
- // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
+ // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = %c0 to %c8 step %c4
+ // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = %c0 to %c10 step %c5
// Compute producer on a hyper rectangular bounding box. Along the second dimenson,
- // the offset is set to the sum of the induction variables and the upper bound
- // to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes)
- // minus the induction variables.
+ // the offset is set to the sum of the induction variables, and the upper bound
+ // to either 8 (tile size) or 17 (sum of max indices (9+7) then + 1) minus the
+ // induction variables.
// CHECK: %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]]
// CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]]
// CHECK: %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]]
// CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)>
// CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
// CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)>
+// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2 - 2)>
// CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
// CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
// CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
// TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
-// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)>
+// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30 - 39)>
// TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
// TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
// TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" | FileCheck %s
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)>
-// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1 - 1)>
+// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1 - 1)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>