From a8aeb651cdae4e687500575108e12c89e540f59c Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 3 Oct 2022 18:40:05 +0000 Subject: [PATCH] [mlir][memref] Extend multi-buffering transform Extend multi-buffering to simplify the affine map created if any of its operands are constants. This avoids downstream problems where more complex affine.apply operations cannot be expanded. Transfer attributes from the old allocation to the new allocation. Reviewed By: ThomasRaoux Differential Revision: https://reviews.llvm.org/D134894 --- mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp | 31 +++++++++++++++------- mlir/test/Dialect/MemRef/multibuffer.mlir | 16 +++++------ mlir/test/Dialect/MemRef/transform-ops.mlir | 4 +-- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp b/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp index 75e3746..d28a603 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp @@ -104,11 +104,11 @@ FailureOr mlir::memref::multiBuffer(memref::AllocOp allocOp, llvm::Optional singleStep = candidateLoop.getSingleStep(); if (!inductionVar || !lowerBound || !singleStep) return failure(); + + if (!dom.dominates(allocOp.getOperation(), candidateLoop)) + return failure(); + OpBuilder builder(candidateLoop); - Value stepValue = - getOrCreateValue(*singleStep, builder, candidateLoop->getLoc()); - Value lowerBoundValue = - getOrCreateValue(*lowerBound, builder, candidateLoop->getLoc()); SmallVector newShape(1, multiplier); ArrayRef oldShape = allocOp.getType().getShape(); newShape.append(oldShape.begin(), oldShape.end()); @@ -117,15 +117,28 @@ FailureOr mlir::memref::multiBuffer(memref::AllocOp allocOp, allocOp.getType().getMemorySpace()); builder.setInsertionPoint(allocOp); Location loc = allocOp->getLoc(); - auto newAlloc = builder.create(loc, newMemref); + auto newAlloc = builder.create(loc, newMemref, ValueRange{}, + allocOp->getAttrs()); builder.setInsertionPoint(&candidateLoop.getLoopBody().front(), candidateLoop.getLoopBody().front().begin()); + + SmallVector operands = {*inductionVar}; AffineExpr induc = getAffineDimExpr(0, allocOp.getContext()); - AffineExpr init = getAffineDimExpr(1, allocOp.getContext()); - AffineExpr step = getAffineDimExpr(2, allocOp.getContext()); + unsigned dimCount = 1; + auto getAffineExpr = [&](OpFoldResult e) -> AffineExpr { + if (Optional constValue = getConstantIntValue(e)) { + return getAffineConstantExpr(*constValue, allocOp.getContext()); + } else { + auto value = getOrCreateValue(e, builder, candidateLoop->getLoc()); + operands.push_back(value); + return getAffineDimExpr(dimCount++, allocOp.getContext()); + } + }; + auto init = getAffineExpr(*lowerBound); + auto step = getAffineExpr(*singleStep); + AffineExpr expr = ((induc - init).floorDiv(step)) % multiplier; - auto map = AffineMap::get(3, 0, expr); - std::array operands = {*inductionVar, lowerBoundValue, stepValue}; + auto map = AffineMap::get(dimCount, 0, expr); Value bufferIndex = builder.create(loc, map, operands); SmallVector offsets, sizes, strides; offsets.push_back(bufferIndex); diff --git a/mlir/test/Dialect/MemRef/multibuffer.mlir b/mlir/test/Dialect/MemRef/multibuffer.mlir index b70b51e..4ab7d99 100644 --- a/mlir/test/Dialect/MemRef/multibuffer.mlir +++ b/mlir/test/Dialect/MemRef/multibuffer.mlir @@ -1,19 +1,19 @@ // RUN: mlir-opt %s -allow-unregistered-dialect -test-multi-buffering=multiplier=5 -cse -split-input-file | FileCheck %s -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 5)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (((d0 - 1) floordiv 3) mod 5)> // CHECK-LABEL: func @multi_buffer func.func @multi_buffer(%a: memref<1024x1024xf32>) { -// CHECK-DAG: %[[A:.*]] = memref.alloc() : memref<5x4x128xf32> +// CHECK-DAG: %[[A:.*]] = memref.alloc() {someAttribute} : memref<5x4x128xf32> // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index - %0 = memref.alloc() : memref<4x128xf32> + %0 = memref.alloc() {someAttribute} : memref<4x128xf32> %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index // CHECK: scf.for %[[IV:.*]] = %[[C1]] scf.for %arg2 = %c1 to %c1024 step %c3 { -// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]]) +// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>> %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] : memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> @@ -32,15 +32,13 @@ func.func @multi_buffer(%a: memref<1024x1024xf32>) { // CHECK-LABEL: func @multi_buffer_affine func.func @multi_buffer_affine(%a: memref<1024x1024xf32>) { // CHECK-DAG: %[[A:.*]] = memref.alloc() : memref<5x4x128xf32> -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index %0 = memref.alloc() : memref<4x128xf32> %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index // CHECK: affine.for %[[IV:.*]] = 1 affine.for %arg2 = 1 to 1024 step 3 { -// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]]) +// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>> %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] : memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> @@ -56,7 +54,7 @@ func.func @multi_buffer_affine(%a: memref<1024x1024xf32>) { // ----- -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 5)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (((d0 - 1) floordiv 3) mod 5)> // CHECK-LABEL: func @multi_buffer_subview_use func.func @multi_buffer_subview_use(%a: memref<1024x1024xf32>) { @@ -69,7 +67,7 @@ func.func @multi_buffer_subview_use(%a: memref<1024x1024xf32>) { %c3 = arith.constant 3 : index // CHECK: scf.for %[[IV:.*]] = %[[C1]] scf.for %arg2 = %c1 to %c1024 step %c3 { -// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]]) +// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>> %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] : memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> diff --git a/mlir/test/Dialect/MemRef/transform-ops.mlir b/mlir/test/Dialect/MemRef/transform-ops.mlir index 5b6f70c..9216ebc 100644 --- a/mlir/test/Dialect/MemRef/transform-ops.mlir +++ b/mlir/test/Dialect/MemRef/transform-ops.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s -test-transform-dialect-interpreter -verify-diagnostics -allow-unregistered-dialect | FileCheck %s -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 2)> +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECK-LABEL: func @multi_buffer @@ -17,7 +17,7 @@ func.func @multi_buffer(%in: memref<16xf32>) { // CHECK: scf.for %[[IV:.*]] = %[[C0]] scf.for %i0 = %c0 to %c16 step %c4 { - // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]], %[[C0]], %[[C4]]) + // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>> -- 2.7.4