From 0412bf6f09892557218b035039e0ab1e567cc1eb Mon Sep 17 00:00:00 2001 From: Andy Davis Date: Thu, 9 May 2019 08:36:02 -0700 Subject: [PATCH] Add memref dimension bounds as upper/lower bounds on MemRefRegion constraints, to guard against potential over-approximation from projection. -- PiperOrigin-RevId: 247431201 --- mlir/include/mlir/Analysis/Utils.h | 5 ++++- mlir/include/mlir/IR/StandardTypes.h | 3 +++ mlir/lib/Analysis/Utils.cpp | 20 ++++++++++++++++++-- mlir/lib/IR/StandardTypes.cpp | 3 +++ mlir/test/Transforms/dma-generate.mlir | 28 ++++++++++++++++++++++++++++ mlir/test/Transforms/loop-tiling.mlir | 12 ++++++------ 6 files changed, 62 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h index 99e0724..34eb627 100644 --- a/mlir/include/mlir/Analysis/Utils.h +++ b/mlir/include/mlir/Analysis/Utils.h @@ -141,6 +141,8 @@ struct MemRefRegion { /// *) Inequality constraints for the slice bounds in 'sliceState', which /// represent the bounds on the loop IVs in this constraint system w.r.t /// to slice operands (which correspond to symbols). + /// If 'addMemRefDimBounds' is true, constant upper/lower bounds + /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'. /// /// For example, the memref region for this operation at loopDepth = 1 will /// be: @@ -155,7 +157,8 @@ struct MemRefRegion { /// The last field is a 2-d FlatAffineConstraints symbolic in %i. /// LogicalResult compute(Operation *op, unsigned loopDepth, - ComputationSliceState *sliceState = nullptr); + ComputationSliceState *sliceState = nullptr, + bool addMemRefDimBounds = true); FlatAffineConstraints *getConstraints() { return &cst; } const FlatAffineConstraints *getConstraints() const { return &cst; } diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h index 55be6ed..9ff3978 100644 --- a/mlir/include/mlir/IR/StandardTypes.h +++ b/mlir/include/mlir/IR/StandardTypes.h @@ -415,6 +415,9 @@ public: static bool kindof(unsigned kind) { return kind == StandardTypes::MemRef; } + /// Integer value indicating that the size in a dimension is dynamic. + static constexpr int64_t kDynamicDimSize = -1; + private: /// Get or create a new MemRefType defined by the arguments. If the resulting /// type would be ill-formed, return nullptr. If the location is provided, diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index aba14bd..1eaab67 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -170,7 +170,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) { // TODO(bondhugula): extend this to any other memref dereferencing ops // (dma_start, dma_wait). LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, - ComputationSliceState *sliceState) { + ComputationSliceState *sliceState, + bool addMemRefDimBounds) { assert((op->isa() || op->isa()) && "load/store op expected"); MemRefAccess access(op); @@ -298,6 +299,20 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format"); + // Add upper/lower bounds for each memref dimension with static size + // to guard against potential over-approximation from projection. + // TODO(andydavis) Support dynamic memref dimensions. + if (addMemRefDimBounds) { + auto memRefType = memref->getType().cast(); + for (unsigned r = 0; r < rank; r++) { + cst.addConstantLowerBound(r, 0); + int64_t dimSize = memRefType.getDimSize(r); + if (dimSize == MemRefType::kDynamicDimSize) + continue; + cst.addConstantUpperBound(r, dimSize - 1); + } + } + LLVM_DEBUG(llvm::dbgs() << "Memory region:\n"); LLVM_DEBUG(cst.dump()); return success(); @@ -372,7 +387,8 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, Operation *opInst = loadOrStoreOp.getOperation(); MemRefRegion region(opInst->getLoc()); - if (failed(region.compute(opInst, /*loopDepth=*/0))) + if (failed(region.compute(opInst, /*loopDepth=*/0, /*sliceState=*/nullptr, + /*addMemRefDimBounds=*/false))) return success(); LLVM_DEBUG(llvm::dbgs() << "Memory region"); diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp index 5af031e..37071e1 100644 --- a/mlir/lib/IR/StandardTypes.cpp +++ b/mlir/lib/IR/StandardTypes.cpp @@ -306,6 +306,9 @@ LogicalResult UnrankedTensorType::verifyConstructionInvariants( // MemRefType //===----------------------------------------------------------------------===// +// static constexpr must have a definition (until in C++17 and inline variable). +constexpr int64_t MemRefType::kDynamicDimSize; + /// Get or create a new MemRefType defined by the arguments. If the resulting /// type would be ill-formed, return nullptr. If the location is provided, /// emit detailed error messages. To emit errors when the location is unknown, diff --git a/mlir/test/Transforms/dma-generate.mlir b/mlir/test/Transforms/dma-generate.mlir index 816330b..0fed053 100644 --- a/mlir/test/Transforms/dma-generate.mlir +++ b/mlir/test/Transforms/dma-generate.mlir @@ -571,6 +571,34 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, // CHECK: dma_wait %6[%c0], %c2_0 : memref<1xi32> // CHECK: affine.for %i1 = +// ---- + +#map3 = (d0) -> (d0) +#map12 = (d0) -> (d0 + 3) +#map14 = (d0, d1) -> ((d0 + d1 * 72) floordiv 2304 + ((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) +#map15 = (d0, d1) -> ((d0 + d1 * 72) mod 2304 - (((d0 + d1 * 72) mod 2304) floordiv 1152) * 1151 - ((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) * 3) +#map16 = (d0, d1) -> (((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) floordiv 8) +// Test for test case in b/128303048 #4. +func @test_memref_bounds(%arg0: memref<4x4x16x1xvector<8x128xf32>>, %arg1: memref<144x9xvector<8x128xf32>>, %arg2: memref<2xvector<8x128xf32>>) -> (memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>) { + %c0 = constant 0 : index + affine.for %i8 = 0 to 9 step 3 { + affine.for %i9 = #map3(%i8) to #map12(%i8) { + affine.for %i10 = 0 to 64 { + %10 = affine.apply #map14(%i9, %i10) + %11 = affine.apply #map15(%i9, %i10) + %12 = affine.apply #map16(%i9, %i10) + %13 = load %arg0[%10, %11, %12, %c0] : memref<4x4x16x1xvector<8x128xf32>> + } + } + } + return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>> +} + +// CHECK: %0 = alloc() : memref<4x4x16x1xvector<8x128xf32>, 2> +// CHECK-NEXT: %1 = alloc() : memref<1xi32> +// CHECK-NEXT: dma_start %arg0[%c0, %c0, %c0, %c0], %0[%c0, %c0, %c0, %c0], %c256, %1[%c0] : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32> +// CHECK-NEXT: dma_wait %1[%c0], %c256 : memref<1xi32> + // ----- // Since the fast memory size is 4 KB, DMA generation will happen right under diff --git a/mlir/test/Transforms/loop-tiling.mlir b/mlir/test/Transforms/loop-tiling.mlir index 4686ff5..7553561 100644 --- a/mlir/test/Transforms/loop-tiling.mlir +++ b/mlir/test/Transforms/loop-tiling.mlir @@ -83,20 +83,20 @@ func @loop_max_min_bound(%A : memref, %L : index, %U : index) { // possible here, they are adjusted to 4 x 4 x 5. // MODEL-LABEL: func @simple_matmul -func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> { +func @simple_matmul(%arg0: memref<256x256xvector<64xf32>>, %arg1: memref<256x256xvector<64xf32>>, %arg2: memref<256x256xvector<64xf32>>) -> memref<256x256xvector<64xf32>> { affine.for %i = 0 to 256 { affine.for %j = 0 to 256 { affine.for %k = 0 to 250 { - %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>> - %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>> - %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>> + %l = load %arg0[%i, %k] : memref<256x256xvector<64xf32>> + %r = load %arg1[%k, %j] : memref<256x256xvector<64xf32>> + %o = load %arg2[%i, %j] : memref<256x256xvector<64xf32>> %m = mulf %l, %r : vector<64xf32> %a = addf %o, %m : vector<64xf32> - store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>> + store %a, %arg2[%i, %j] : memref<256x256xvector<64xf32>> } } } - return %arg2 : memref<8x8xvector<64xf32>> + return %arg2 : memref<256x256xvector<64xf32>> } // MODEL: affine.for %i0 = 0 to 256 step 4 { // MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 { -- 2.7.4