From 0412bf6f09892557218b035039e0ab1e567cc1eb Mon Sep 17 00:00:00 2001
From: Andy Davis <andydavis@google.com>
Date: Thu, 9 May 2019 08:36:02 -0700
Subject: [PATCH]     Add memref dimension bounds as upper/lower bounds on
 MemRefRegion constraints, to guard against potential over-approximation from
 projection.

--

PiperOrigin-RevId: 247431201
---
 mlir/include/mlir/Analysis/Utils.h     |  5 ++++-
 mlir/include/mlir/IR/StandardTypes.h   |  3 +++
 mlir/lib/Analysis/Utils.cpp            | 20 ++++++++++++++++++--
 mlir/lib/IR/StandardTypes.cpp          |  3 +++
 mlir/test/Transforms/dma-generate.mlir | 28 ++++++++++++++++++++++++++++
 mlir/test/Transforms/loop-tiling.mlir  | 12 ++++++------
 6 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h
index 99e0724..34eb627 100644
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@@ -141,6 +141,8 @@ struct MemRefRegion {
   /// *) Inequality constraints for the slice bounds in 'sliceState', which
   ///    represent the bounds on the loop IVs in this constraint system w.r.t
   ///    to slice operands (which correspond to symbols).
+  /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
+  /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
   ///
   ///  For example, the memref region for this operation at loopDepth = 1 will
   ///  be:
@@ -155,7 +157,8 @@ struct MemRefRegion {
   /// The last field is a 2-d FlatAffineConstraints symbolic in %i.
   ///
   LogicalResult compute(Operation *op, unsigned loopDepth,
-                        ComputationSliceState *sliceState = nullptr);
+                        ComputationSliceState *sliceState = nullptr,
+                        bool addMemRefDimBounds = true);
 
   FlatAffineConstraints *getConstraints() { return &cst; }
   const FlatAffineConstraints *getConstraints() const { return &cst; }
diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h
index 55be6ed..9ff3978 100644
--- a/mlir/include/mlir/IR/StandardTypes.h
+++ b/mlir/include/mlir/IR/StandardTypes.h
@@ -415,6 +415,9 @@ public:
 
   static bool kindof(unsigned kind) { return kind == StandardTypes::MemRef; }
 
+  /// Integer value indicating that the size in a dimension is dynamic.
+  static constexpr int64_t kDynamicDimSize = -1;
+
 private:
   /// Get or create a new MemRefType defined by the arguments.  If the resulting
   /// type would be ill-formed, return nullptr.  If the location is provided,
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index aba14bd..1eaab67 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -170,7 +170,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 // TODO(bondhugula): extend this to any other memref dereferencing ops
 // (dma_start, dma_wait).
 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
-                                    ComputationSliceState *sliceState) {
+                                    ComputationSliceState *sliceState,
+                                    bool addMemRefDimBounds) {
   assert((op->isa<LoadOp>() || op->isa<StoreOp>()) && "load/store op expected");
 
   MemRefAccess access(op);
@@ -298,6 +299,20 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
 
   assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format");
 
+  // Add upper/lower bounds for each memref dimension with static size
+  // to guard against potential over-approximation from projection.
+  // TODO(andydavis) Support dynamic memref dimensions.
+  if (addMemRefDimBounds) {
+    auto memRefType = memref->getType().cast<MemRefType>();
+    for (unsigned r = 0; r < rank; r++) {
+      cst.addConstantLowerBound(r, 0);
+      int64_t dimSize = memRefType.getDimSize(r);
+      if (dimSize == MemRefType::kDynamicDimSize)
+        continue;
+      cst.addConstantUpperBound(r, dimSize - 1);
+    }
+  }
+
   LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
   LLVM_DEBUG(cst.dump());
   return success();
@@ -372,7 +387,8 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
   Operation *opInst = loadOrStoreOp.getOperation();
 
   MemRefRegion region(opInst->getLoc());
-  if (failed(region.compute(opInst, /*loopDepth=*/0)))
+  if (failed(region.compute(opInst, /*loopDepth=*/0, /*sliceState=*/nullptr,
+                            /*addMemRefDimBounds=*/false)))
     return success();
 
   LLVM_DEBUG(llvm::dbgs() << "Memory region");
diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp
index 5af031e..37071e1 100644
--- a/mlir/lib/IR/StandardTypes.cpp
+++ b/mlir/lib/IR/StandardTypes.cpp
@@ -306,6 +306,9 @@ LogicalResult UnrankedTensorType::verifyConstructionInvariants(
 // MemRefType
 //===----------------------------------------------------------------------===//
 
+// static constexpr must have a definition (until in C++17 and inline variable).
+constexpr int64_t MemRefType::kDynamicDimSize;
+
 /// Get or create a new MemRefType defined by the arguments.  If the resulting
 /// type would be ill-formed, return nullptr.  If the location is provided,
 /// emit detailed error messages.  To emit errors when the location is unknown,
diff --git a/mlir/test/Transforms/dma-generate.mlir b/mlir/test/Transforms/dma-generate.mlir
index 816330b..0fed053 100644
--- a/mlir/test/Transforms/dma-generate.mlir
+++ b/mlir/test/Transforms/dma-generate.mlir
@@ -571,6 +571,34 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
 // CHECK:         dma_wait %6[%c0], %c2_0 : memref<1xi32>
 // CHECK:         affine.for %i1 =
 
+// ----
+
+#map3 = (d0) -> (d0)
+#map12 = (d0) -> (d0 + 3)
+#map14 = (d0, d1) -> ((d0 + d1 * 72) floordiv 2304 + ((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3)
+#map15 = (d0, d1) -> ((d0 + d1 * 72) mod 2304 - (((d0 + d1 * 72) mod 2304) floordiv 1152) * 1151 - ((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) * 3)
+#map16 = (d0, d1) -> (((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) floordiv 8)
+// Test for test case in b/128303048 #4.
+func @test_memref_bounds(%arg0: memref<4x4x16x1xvector<8x128xf32>>, %arg1: memref<144x9xvector<8x128xf32>>, %arg2: memref<2xvector<8x128xf32>>) -> (memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>) {
+  %c0 = constant 0 : index
+  affine.for %i8 = 0 to 9 step 3 {
+    affine.for %i9 = #map3(%i8) to #map12(%i8) {
+      affine.for %i10 = 0 to 64 {
+        %10 = affine.apply #map14(%i9, %i10)
+        %11 = affine.apply #map15(%i9, %i10)
+        %12 = affine.apply #map16(%i9, %i10)
+        %13 = load %arg0[%10, %11, %12, %c0] : memref<4x4x16x1xvector<8x128xf32>>
+      }
+    }
+  }
+  return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>
+}
+
+// CHECK:       %0 = alloc() : memref<4x4x16x1xvector<8x128xf32>, 2>
+// CHECK-NEXT:  %1 = alloc() : memref<1xi32>
+// CHECK-NEXT:  dma_start %arg0[%c0, %c0, %c0, %c0], %0[%c0, %c0, %c0, %c0], %c256, %1[%c0] : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32>
+// CHECK-NEXT:  dma_wait %1[%c0], %c256 : memref<1xi32>
+
 // -----
 
 // Since the fast memory size is 4 KB, DMA generation will happen right under
diff --git a/mlir/test/Transforms/loop-tiling.mlir b/mlir/test/Transforms/loop-tiling.mlir
index 4686ff5..7553561 100644
--- a/mlir/test/Transforms/loop-tiling.mlir
+++ b/mlir/test/Transforms/loop-tiling.mlir
@@ -83,20 +83,20 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
 // possible here, they are adjusted to 4 x 4 x 5.
 
 // MODEL-LABEL: func @simple_matmul
-func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
+func @simple_matmul(%arg0: memref<256x256xvector<64xf32>>, %arg1: memref<256x256xvector<64xf32>>, %arg2: memref<256x256xvector<64xf32>>) -> memref<256x256xvector<64xf32>> {
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
       affine.for %k = 0 to 250 {
-        %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
-        %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
-        %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
+        %l = load %arg0[%i, %k] : memref<256x256xvector<64xf32>>
+        %r = load %arg1[%k, %j] : memref<256x256xvector<64xf32>>
+        %o = load %arg2[%i, %j] : memref<256x256xvector<64xf32>>
         %m = mulf %l, %r : vector<64xf32>
         %a = addf %o, %m : vector<64xf32>
-        store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>>
+        store %a, %arg2[%i, %j] : memref<256x256xvector<64xf32>>
       }
     }
   }
-  return %arg2 : memref<8x8xvector<64xf32>>
+  return %arg2 : memref<256x256xvector<64xf32>>
 }
 // MODEL:       affine.for %i0 = 0 to 256 step 4 {
 // MODEL-NEXT:    affine.for %i1 = 0 to 256 step 4 {
-- 
2.7.4