Add memref dimension bounds as upper/lower bounds on MemRefRegion constraints...

author Andy Davis <andydavis@google.com>

Thu, 9 May 2019 15:36:02 +0000 (08:36 -0700)

committer Mehdi Amini <joker.eph@gmail.com>

Sat, 11 May 2019 02:25:53 +0000 (19:25 -0700)
author Andy Davis <andydavis@google.com>
Thu, 9 May 2019 15:36:02 +0000 (08:36 -0700)
committer Mehdi Amini <joker.eph@gmail.com>
Sat, 11 May 2019 02:25:53 +0000 (19:25 -0700)
diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h

index 99e0724..34eb627 100644 (file)
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@@ -141,6 +141,8 @@ struct MemRefRegion {
    /// *) Inequality constraints for the slice bounds in 'sliceState', which
    ///    represent the bounds on the loop IVs in this constraint system w.r.t
    ///    to slice operands (which correspond to symbols).
+  /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
+  /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
    ///
    ///  For example, the memref region for this operation at loopDepth = 1 will
    ///  be:
@@ -155,7 +157,8 @@ struct MemRefRegion {
    /// The last field is a 2-d FlatAffineConstraints symbolic in %i.
    ///
    LogicalResult compute(Operation *op, unsigned loopDepth,
-                        ComputationSliceState *sliceState = nullptr);
+                        ComputationSliceState *sliceState = nullptr,
+                        bool addMemRefDimBounds = true);
  
    FlatAffineConstraints *getConstraints() { return &cst; }
    const FlatAffineConstraints *getConstraints() const { return &cst; }
diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h

index 55be6ed..9ff3978 100644 (file)
--- a/mlir/include/mlir/IR/StandardTypes.h
+++ b/mlir/include/mlir/IR/StandardTypes.h
@@ -415,6 +415,9 @@ public:
  
    static bool kindof(unsigned kind) { return kind == StandardTypes::MemRef; }
  
+  /// Integer value indicating that the size in a dimension is dynamic.
+  static constexpr int64_t kDynamicDimSize = -1;
+
  private:
    /// Get or create a new MemRefType defined by the arguments.  If the resulting
    /// type would be ill-formed, return nullptr.  If the location is provided,
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp

index aba14bd..1eaab67 100644 (file)
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -170,7 +170,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
  // TODO(bondhugula): extend this to any other memref dereferencing ops
  // (dma_start, dma_wait).
  LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
-                                    ComputationSliceState *sliceState) {
+                                    ComputationSliceState *sliceState,
+                                    bool addMemRefDimBounds) {
    assert((op->isa<LoadOp>() || op->isa<StoreOp>()) && "load/store op expected");
  
    MemRefAccess access(op);
@@ -298,6 +299,20 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
  
    assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format");
  
+  // Add upper/lower bounds for each memref dimension with static size
+  // to guard against potential over-approximation from projection.
+  // TODO(andydavis) Support dynamic memref dimensions.
+  if (addMemRefDimBounds) {
+    auto memRefType = memref->getType().cast<MemRefType>();
+    for (unsigned r = 0; r < rank; r++) {
+      cst.addConstantLowerBound(r, 0);
+      int64_t dimSize = memRefType.getDimSize(r);
+      if (dimSize == MemRefType::kDynamicDimSize)
+        continue;
+      cst.addConstantUpperBound(r, dimSize - 1);
+    }
+  }
+
    LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
    LLVM_DEBUG(cst.dump());
    return success();
@@ -372,7 +387,8 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
    Operation *opInst = loadOrStoreOp.getOperation();
  
    MemRefRegion region(opInst->getLoc());
-  if (failed(region.compute(opInst, /*loopDepth=*/0)))
+  if (failed(region.compute(opInst, /*loopDepth=*/0, /*sliceState=*/nullptr,
+                            /*addMemRefDimBounds=*/false)))
      return success();
  
    LLVM_DEBUG(llvm::dbgs() << "Memory region");
diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp

index 5af031e..37071e1 100644 (file)
--- a/mlir/lib/IR/StandardTypes.cpp
+++ b/mlir/lib/IR/StandardTypes.cpp
@@ -306,6 +306,9 @@ LogicalResult UnrankedTensorType::verifyConstructionInvariants(
  // MemRefType
  //===----------------------------------------------------------------------===//
  
+// static constexpr must have a definition (until in C++17 and inline variable).
+constexpr int64_t MemRefType::kDynamicDimSize;
+
  /// Get or create a new MemRefType defined by the arguments.  If the resulting
  /// type would be ill-formed, return nullptr.  If the location is provided,
  /// emit detailed error messages.  To emit errors when the location is unknown,
diff --git a/mlir/test/Transforms/dma-generate.mlir b/mlir/test/Transforms/dma-generate.mlir

index 816330b..0fed053 100644 (file)
--- a/mlir/test/Transforms/dma-generate.mlir
+++ b/mlir/test/Transforms/dma-generate.mlir
@@ -571,6 +571,34 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
  // CHECK:         dma_wait %6[%c0], %c2_0 : memref<1xi32>
  // CHECK:         affine.for %i1 =
  
+// ----
+
+#map3 = (d0) -> (d0)
+#map12 = (d0) -> (d0 + 3)
+#map14 = (d0, d1) -> ((d0 + d1 * 72) floordiv 2304 + ((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3)
+#map15 = (d0, d1) -> ((d0 + d1 * 72) mod 2304 - (((d0 + d1 * 72) mod 2304) floordiv 1152) * 1151 - ((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) * 3)
+#map16 = (d0, d1) -> (((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) floordiv 8)
+// Test for test case in b/128303048 #4.
+func @test_memref_bounds(%arg0: memref<4x4x16x1xvector<8x128xf32>>, %arg1: memref<144x9xvector<8x128xf32>>, %arg2: memref<2xvector<8x128xf32>>) -> (memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>) {
+  %c0 = constant 0 : index
+  affine.for %i8 = 0 to 9 step 3 {
+    affine.for %i9 = #map3(%i8) to #map12(%i8) {
+      affine.for %i10 = 0 to 64 {
+        %10 = affine.apply #map14(%i9, %i10)
+        %11 = affine.apply #map15(%i9, %i10)
+        %12 = affine.apply #map16(%i9, %i10)
+        %13 = load %arg0[%10, %11, %12, %c0] : memref<4x4x16x1xvector<8x128xf32>>
+      }
+    }
+  }
+  return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>
+}
+
+// CHECK:       %0 = alloc() : memref<4x4x16x1xvector<8x128xf32>, 2>
+// CHECK-NEXT:  %1 = alloc() : memref<1xi32>
+// CHECK-NEXT:  dma_start %arg0[%c0, %c0, %c0, %c0], %0[%c0, %c0, %c0, %c0], %c256, %1[%c0] : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32>
+// CHECK-NEXT:  dma_wait %1[%c0], %c256 : memref<1xi32>
+
  // -----
  
  // Since the fast memory size is 4 KB, DMA generation will happen right under
diff --git a/mlir/test/Transforms/loop-tiling.mlir b/mlir/test/Transforms/loop-tiling.mlir

index 4686ff5..7553561 100644 (file)
--- a/mlir/test/Transforms/loop-tiling.mlir
+++ b/mlir/test/Transforms/loop-tiling.mlir
@@ -83,20 +83,20 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
  // possible here, they are adjusted to 4 x 4 x 5.
  
  // MODEL-LABEL: func @simple_matmul
-func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
+func @simple_matmul(%arg0: memref<256x256xvector<64xf32>>, %arg1: memref<256x256xvector<64xf32>>, %arg2: memref<256x256xvector<64xf32>>) -> memref<256x256xvector<64xf32>> {
    affine.for %i = 0 to 256 {
      affine.for %j = 0 to 256 {
        affine.for %k = 0 to 250 {
-        %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
-        %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
-        %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
+        %l = load %arg0[%i, %k] : memref<256x256xvector<64xf32>>
+        %r = load %arg1[%k, %j] : memref<256x256xvector<64xf32>>
+        %o = load %arg2[%i, %j] : memref<256x256xvector<64xf32>>
          %m = mulf %l, %r : vector<64xf32>
          %a = addf %o, %m : vector<64xf32>
-        store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>>
+        store %a, %arg2[%i, %j] : memref<256x256xvector<64xf32>>
        }
      }
    }
-  return %arg2 : memref<8x8xvector<64xf32>>
+  return %arg2 : memref<256x256xvector<64xf32>>
  }
  // MODEL:       affine.for %i0 = 0 to 256 step 4 {
  // MODEL-NEXT:    affine.for %i1 = 0 to 256 step 4 {
author	Andy Davis <andydavis@google.com>
	Thu, 9 May 2019 15:36:02 +0000 (08:36 -0700)
committer	Mehdi Amini <joker.eph@gmail.com>
	Sat, 11 May 2019 02:25:53 +0000 (19:25 -0700)
mlir/include/mlir/Analysis/Utils.h		patch \| blob \| history
mlir/include/mlir/IR/StandardTypes.h		patch \| blob \| history
mlir/lib/Analysis/Utils.cpp		patch \| blob \| history
mlir/lib/IR/StandardTypes.cpp		patch \| blob \| history
mlir/test/Transforms/dma-generate.mlir		patch \| blob \| history
mlir/test/Transforms/loop-tiling.mlir		patch \| blob \| history