/// *) Inequality constraints for the slice bounds in 'sliceState', which
/// represent the bounds on the loop IVs in this constraint system w.r.t
/// to slice operands (which correspond to symbols).
+ /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
+ /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
///
/// For example, the memref region for this operation at loopDepth = 1 will
/// be:
/// The last field is a 2-d FlatAffineConstraints symbolic in %i.
///
LogicalResult compute(Operation *op, unsigned loopDepth,
- ComputationSliceState *sliceState = nullptr);
+ ComputationSliceState *sliceState = nullptr,
+ bool addMemRefDimBounds = true);
FlatAffineConstraints *getConstraints() { return &cst; }
const FlatAffineConstraints *getConstraints() const { return &cst; }
static bool kindof(unsigned kind) { return kind == StandardTypes::MemRef; }
+ /// Integer value indicating that the size in a dimension is dynamic.
+ static constexpr int64_t kDynamicDimSize = -1;
+
private:
/// Get or create a new MemRefType defined by the arguments. If the resulting
/// type would be ill-formed, return nullptr. If the location is provided,
// TODO(bondhugula): extend this to any other memref dereferencing ops
// (dma_start, dma_wait).
LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
- ComputationSliceState *sliceState) {
+ ComputationSliceState *sliceState,
+ bool addMemRefDimBounds) {
assert((op->isa<LoadOp>() || op->isa<StoreOp>()) && "load/store op expected");
MemRefAccess access(op);
assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format");
+ // Add upper/lower bounds for each memref dimension with static size
+ // to guard against potential over-approximation from projection.
+ // TODO(andydavis) Support dynamic memref dimensions.
+ if (addMemRefDimBounds) {
+ auto memRefType = memref->getType().cast<MemRefType>();
+ for (unsigned r = 0; r < rank; r++) {
+ cst.addConstantLowerBound(r, 0);
+ int64_t dimSize = memRefType.getDimSize(r);
+ if (dimSize == MemRefType::kDynamicDimSize)
+ continue;
+ cst.addConstantUpperBound(r, dimSize - 1);
+ }
+ }
+
LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
LLVM_DEBUG(cst.dump());
return success();
Operation *opInst = loadOrStoreOp.getOperation();
MemRefRegion region(opInst->getLoc());
- if (failed(region.compute(opInst, /*loopDepth=*/0)))
+ if (failed(region.compute(opInst, /*loopDepth=*/0, /*sliceState=*/nullptr,
+ /*addMemRefDimBounds=*/false)))
return success();
LLVM_DEBUG(llvm::dbgs() << "Memory region");
// MemRefType
//===----------------------------------------------------------------------===//
+// static constexpr must have a definition (until in C++17 and inline variable).
+constexpr int64_t MemRefType::kDynamicDimSize;
+
/// Get or create a new MemRefType defined by the arguments. If the resulting
/// type would be ill-formed, return nullptr. If the location is provided,
/// emit detailed error messages. To emit errors when the location is unknown,
// CHECK: dma_wait %6[%c0], %c2_0 : memref<1xi32>
// CHECK: affine.for %i1 =
+// ----
+
+#map3 = (d0) -> (d0)
+#map12 = (d0) -> (d0 + 3)
+#map14 = (d0, d1) -> ((d0 + d1 * 72) floordiv 2304 + ((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3)
+#map15 = (d0, d1) -> ((d0 + d1 * 72) mod 2304 - (((d0 + d1 * 72) mod 2304) floordiv 1152) * 1151 - ((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) * 3)
+#map16 = (d0, d1) -> (((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) floordiv 8)
+// Test for test case in b/128303048 #4.
+func @test_memref_bounds(%arg0: memref<4x4x16x1xvector<8x128xf32>>, %arg1: memref<144x9xvector<8x128xf32>>, %arg2: memref<2xvector<8x128xf32>>) -> (memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>) {
+ %c0 = constant 0 : index
+ affine.for %i8 = 0 to 9 step 3 {
+ affine.for %i9 = #map3(%i8) to #map12(%i8) {
+ affine.for %i10 = 0 to 64 {
+ %10 = affine.apply #map14(%i9, %i10)
+ %11 = affine.apply #map15(%i9, %i10)
+ %12 = affine.apply #map16(%i9, %i10)
+ %13 = load %arg0[%10, %11, %12, %c0] : memref<4x4x16x1xvector<8x128xf32>>
+ }
+ }
+ }
+ return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>
+}
+
+// CHECK: %0 = alloc() : memref<4x4x16x1xvector<8x128xf32>, 2>
+// CHECK-NEXT: %1 = alloc() : memref<1xi32>
+// CHECK-NEXT: dma_start %arg0[%c0, %c0, %c0, %c0], %0[%c0, %c0, %c0, %c0], %c256, %1[%c0] : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32>
+// CHECK-NEXT: dma_wait %1[%c0], %c256 : memref<1xi32>
+
// -----
// Since the fast memory size is 4 KB, DMA generation will happen right under
// possible here, they are adjusted to 4 x 4 x 5.
// MODEL-LABEL: func @simple_matmul
-func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
+func @simple_matmul(%arg0: memref<256x256xvector<64xf32>>, %arg1: memref<256x256xvector<64xf32>>, %arg2: memref<256x256xvector<64xf32>>) -> memref<256x256xvector<64xf32>> {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
affine.for %k = 0 to 250 {
- %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
- %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
- %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
+ %l = load %arg0[%i, %k] : memref<256x256xvector<64xf32>>
+ %r = load %arg1[%k, %j] : memref<256x256xvector<64xf32>>
+ %o = load %arg2[%i, %j] : memref<256x256xvector<64xf32>>
%m = mulf %l, %r : vector<64xf32>
%a = addf %o, %m : vector<64xf32>
- store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>>
+ store %a, %arg2[%i, %j] : memref<256x256xvector<64xf32>>
}
}
}
- return %arg2 : memref<8x8xvector<64xf32>>
+ return %arg2 : memref<256x256xvector<64xf32>>
}
// MODEL: affine.for %i0 = 0 to 256 step 4 {
// MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 {