[mlir][linalg] Fix incorrect bound calculation for tiling conv

author Lei Zhang <antiagainst@google.com>

Thu, 30 Sep 2021 17:50:44 +0000 (13:50 -0400)

committer Lei Zhang <antiagainst@google.com>

Thu, 30 Sep 2021 17:50:57 +0000 (13:50 -0400)
author Lei Zhang <antiagainst@google.com>
Thu, 30 Sep 2021 17:50:44 +0000 (13:50 -0400)
committer Lei Zhang <antiagainst@google.com>
Thu, 30 Sep 2021 17:50:57 +0000 (13:50 -0400)
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp

index b7a2bec..7caef6b 100644 (file)
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -637,14 +637,33 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
        LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
                                << ", size: " << size
                                << ": make sure in bound with affine.min\n");
+
        AffineExpr dim0, dim1, dim2;
        bindDims(builder.getContext(), dim0, dim1, dim2);
-      // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
-      AffineMap minMap =
-          AffineMap::inferFromExprList(
-              ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
+
+      // Get the dimension size for this dimension. We need to first calculate
+      // the max index and then plus one. This is important because for
+      // convolution ops, we have its input window dimension's affine map of the
+      // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
+      // dimension and `s0` is stride. Directly use the dimension size of
+      // output/filer window dimensions will cause incorrect calculation.
+      AffineMap minusOneMap =
+          AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 - 1}})
                .front();
-      Value d = applyMapToValues(builder, loc, m, ubs).front();
+      AffineMap plusOneMap =
+          AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 + 1}})
+              .front();
+      auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
+        return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
+            .getResult();
+      }));
+      Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
+      Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
+
+      // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
+      AffineMap minMap = AffineMap::inferFromExprList(
+                             {ArrayRef<AffineExpr>{dim0, dim1 - dim2}})
+                             .front();
        SmallVector<Value, 4> operands{size, d, offset};
        fullyComposeAffineMapAndOperands(&minMap, &operands);
        canonicalizeMapAndOperands(&minMap, &operands);
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir

index 8b43013..f53e170 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
@@ -233,7 +233,7 @@ builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
  // -----
  
  //  CHECK-DAG:  #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)>
+//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>
  //  CHECK-DAG:  #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)>
  #map0 = affine_map<(d0, d1) -> (d0, d0 + d1)>
  #map1 = affine_map<(d0, d1) -> (d0, d1)>
@@ -245,13 +245,13 @@ func @fuse_non_rectangular(%arg0: tensor<10x18xf32>,
    %cst = constant 0.000000e+00 : f32
    %0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32>
  
-  //      CHECK:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
-  //      CHECK:    scf.for %[[IV1:[0-9a-zA-Z]*]] =
+  //      CHECK:  scf.for %[[IV0:[0-9a-zA-Z]*]] = %c0 to %c8 step %c4
+  //      CHECK:    scf.for %[[IV1:[0-9a-zA-Z]*]] = %c0 to %c10 step %c5
  
    // Compute producer on a hyper rectangular bounding box. Along the second dimenson,
-  // the offset is set to the sum of the induction variables and the upper bound
-  // to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes)
-  // minus the induction variables.
+  // the offset is set to the sum of the induction variables, and the upper bound
+  // to either 8 (tile size) or 17 (sum of max indices (9+7) then + 1) minus the
+  // induction variables.
    //      CHECK:      %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]]
    //      CHECK:      %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]]
    //      CHECK:      %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]]
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir

index 4aef50e..324da10 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -203,7 +203,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
  // CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)>
  // CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
  // CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)>
+// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2 - 2)>
  // CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
  // CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
  // CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir

index eb4124f..b4ee26a 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -1,7 +1,7 @@
  // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
  
  // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
-// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)>
+// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30 - 39)>
  // TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
  // TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
  // TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
diff --git a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir

index b25ad22..9d3e0a5 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
@@ -1,8 +1,8 @@
  // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" | FileCheck %s
  
  //  CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
-//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)>
-//  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)>
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1 - 1)>
+//  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1 - 1)>
  //  CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
  //  CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
author	Lei Zhang <antiagainst@google.com>
	Thu, 30 Sep 2021 17:50:44 +0000 (13:50 -0400)
committer	Lei Zhang <antiagainst@google.com>
	Thu, 30 Sep 2021 17:50:57 +0000 (13:50 -0400)
mlir/lib/Dialect/Linalg/Utils/Utils.cpp		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-conv.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-simple-conv.mlir		patch \| blob \| history