From ad7c49bef774191304b804e60821c0cdb6a390cb Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Mon, 9 May 2022 08:43:40 -0700 Subject: [PATCH] [mlir][linalg] Fix padding size calculation for Conv2d ops. This patch fixed the padding size calculation for Conv2d ops when the stride > 1. It contains the changes below: - Use addBound to add constraint for AffineApplyOp in getUpperBoundForIndex. So the result value can be mapped and retrieved later. - Fixed the bound from AffineMinOp by adding as a closed bound. Originally the bound was added as an open upper bound, which results in the incorrect bounds when we multiply the values. For example: ``` %0 = affine.min affine_map<()[s0] -> (4, -s0 + 11)>()[iv0] %1 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%0] If we add the affine.min as an open bound, addBound will internally transform it into the close bound "%0 <= 3". The following sliceBounds will derive the bound of %1 as "%1 <= 6" and return the open bound "%1 < 7", while the correct bound should be "%1 <= 8". ``` - In addition to addBound, I also changed sliceBounds to support returning closed upper bound, since for the size computation, we usually care about the closed bounds. - Change the getUpperBoundForIndex to favor constant bounds when required. The sliceBounds will return a tighter but non-constant bounds, which can't be used for padding. The constantRequired option requires getUpperBoundForIndex to get the constant bounds when possible. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D124821 --- .../Dialect/Affine/Analysis/AffineStructures.h | 23 +++++++- mlir/include/mlir/Dialect/Linalg/Utils/Utils.h | 6 ++- .../Dialect/Affine/Analysis/AffineStructures.cpp | 34 ++++++++---- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 35 ++++++++---- mlir/test/Dialect/Linalg/pad.mlir | 63 ++++++++++++++++++++++ 5 files changed, 140 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h index 875e405..427d96f 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -166,8 +166,25 @@ public: /// bound map is expected to have exactly one result. In case of a LB/UB, the /// bound map may have more than one result, for each of which an inequality /// is added. + /// + /// The bound can be added as open or closed by specifying isClosedBound. In + /// case of a LB/UB, isClosedBound = false means the bound is added internally + /// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can + /// only be added as a closed bound. + /// /// Note: The dimensions/symbols of this FlatAffineConstraints must match the /// dimensions/symbols of the affine map. + LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, + bool isClosedBound); + + /// Adds a bound for the identifier at the specified position with constraints + /// being drawn from the specified bound map. In case of an EQ bound, the + /// bound map is expected to have exactly one result. In case of a LB/UB, the + /// bound map may have more than one result, for each of which an inequality + /// is added. + /// Note: The dimensions/symbols of this FlatAffineConstraints must match the + /// dimensions/symbols of the affine map. By default the lower bound is closed + /// and the upper bound is open. LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap); /// Adds a bound for the identifier at the specified position with constraints @@ -197,9 +214,13 @@ public: /// identifiers as floordiv's and mod's of affine expressions of other /// identifiers with respect to (positive) constants. Sets bound map to a /// null AffineMap if such a bound can't be found (or yet unimplemented). + /// + /// By default the returned lower bounds are closed and upper bounds are open. + /// This can be changed by getClosedUB. void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl *lbMaps, - SmallVectorImpl *ubMaps); + SmallVectorImpl *ubMaps, + bool getClosedUB = false); /// Composes an affine map whose dimensions and symbols match one to one with /// the dimensions and symbols of this FlatAffineConstraints. The results of diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index bd427ea..3e9d072 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -50,6 +50,9 @@ SmallVector getDynOperands(Location loc, Value val, OpBuilder &b); /// values. The method sets `boundMap` to an affine map that given /// `boundOperands` evaluates to an upper bound for the index computation. /// +/// If constantRequired is true, only returns the constant bounds (potentially +/// over-approximating) and fails when not possible. +/// /// Example: /// ``` /// %dim0 = dim %tensor, %c0 @@ -62,7 +65,8 @@ SmallVector getDynOperands(Location loc, Value val, OpBuilder &b); /// - boundMap = affine.map<(d0) -> (d0 + 40)> /// - boundOperands = [%dim1] void getUpperBoundForIndex(Value value, AffineMap &boundMap, - SmallVectorImpl &boundOperands); + SmallVectorImpl &boundOperands, + bool constantRequired = false); /// Returns a constant upper bound for the result `value` of an index /// computation. Calls `getUpperBoundForIndex` and returns a constant upper diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp index e31dcbd..619d9c7 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -965,7 +965,8 @@ FlatAffineValueConstraints::getLowerAndUpperBound( /// this process if needed. void FlatAffineValueConstraints::getSliceBounds( unsigned offset, unsigned num, MLIRContext *context, - SmallVectorImpl *lbMaps, SmallVectorImpl *ubMaps) { + SmallVectorImpl *lbMaps, SmallVectorImpl *ubMaps, + bool getClosedUB) { assert(num < getNumDimIds() && "invalid range"); // Basic simplification. @@ -1065,6 +1066,8 @@ void FlatAffineValueConstraints::getSliceBounds( // again. } while (changed); + int64_t ubAdjustment = getClosedUB ? 0 : 1; + // Set the lower and upper bound maps for all the identifiers that were // computed as affine expressions of the rest as the "detected expr" and // "detected expr + 1" respectively; set the undetected ones to null. @@ -1081,7 +1084,7 @@ void FlatAffineValueConstraints::getSliceBounds( if (expr) { lbMap = AffineMap::get(numMapDims, numMapSymbols, expr); - ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + 1); + ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment); } else { // TODO: Whenever there are local identifiers in the dependence // constraints, we'll conservatively over-approximate, since we don't @@ -1118,9 +1121,10 @@ void FlatAffineValueConstraints::getSliceBounds( << "WARNING: Potentially over-approximating slice ub\n"); auto ubConst = getConstantBound(BoundType::UB, pos + offset); if (ubConst.hasValue()) { - (ubMap) = AffineMap::get( - numMapDims, numMapSymbols, - getAffineConstantExpr(ubConst.getValue() + 1, context)); + ubMap = + AffineMap::get(numMapDims, numMapSymbols, + getAffineConstantExpr( + ubConst.getValue() + ubAdjustment, context)); } } } @@ -1158,10 +1162,13 @@ LogicalResult FlatAffineValueConstraints::flattenAlignedMapAndMergeLocals( } LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, - AffineMap boundMap) { + AffineMap boundMap, + bool isClosedBound) { assert(boundMap.getNumDims() == getNumDimIds() && "dim mismatch"); assert(boundMap.getNumSymbols() == getNumSymbolIds() && "symbol mismatch"); assert(pos < getNumDimAndSymbolIds() && "invalid position"); + assert((type != BoundType::EQ || isClosedBound) && + "EQ bound must be closed."); // Equality follows the logic of lower bound except that we add an equality // instead of an inequality. @@ -1193,17 +1200,24 @@ LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) { ineq[j] = lower ? -flatExpr[i] : flatExpr[i]; } + // Make the bound closed in if flatExpr is open. The inequality is always + // created in the upper bound form, so the adjustment is -1. + int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1; // Constant term. - ineq[getNumCols() - 1] = - lower ? -flatExpr[flatExpr.size() - 1] - // Upper bound in flattenedExpr is an exclusive one. - : flatExpr[flatExpr.size() - 1] - 1; + ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1] + : flatExpr[flatExpr.size() - 1]) + + boundAdjustment; type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq); } return success(); } +LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, + AffineMap boundMap) { + return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB); +} + AffineMap FlatAffineValueConstraints::computeAlignedMap(AffineMap map, ValueRange operands) const { diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 87af3b9..bfd2c68 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -177,7 +177,8 @@ SmallVector getDynOperands(Location loc, Value val, OpBuilder &b) { } void getUpperBoundForIndex(Value value, AffineMap &boundMap, - SmallVectorImpl &boundOperands) { + SmallVectorImpl &boundOperands, + bool constantRequired) { // Initialize `boundMap` and `boundOperands` to the identity returning // `value`. This combination is the default result of the method if no // simplification is possible. @@ -226,11 +227,13 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, if (!(llvm::all_of(op->getResults(), findOrCreateId) && llvm::all_of(op->getOperands(), findOrCreateId))) return; + // Add AffineApplyOps to the constraints. if (auto applyOp = dyn_cast(op)) { - AffineValueMap valueMap(applyOp.getAffineMap(), applyOp.getOperands(), - applyOp.getResult()); - if (failed(constraints.composeMap(&valueMap))) + AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(), + applyOp.getOperands()); + if (failed(constraints.addBound(IntegerPolyhedron::EQ, + getPosition(applyOp.getResult()), map))) return; continue; } @@ -239,17 +242,30 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(), minOp.getOperands()); if (failed(constraints.addBound(IntegerPolyhedron::UB, - getPosition(minOp.getResult()), map))) + getPosition(minOp.getResult()), map, + /*isClosedBound=*/true))) return; } // Obtain an upper bound for the affine index computation by projecting out // all temporary results and expressing the upper bound for `value` in terms // of the terminals of the index computation. - SmallVector lowerBounds(1), upperBounds(1); - constraints.getSliceBounds(getPosition(value), 1, value.getContext(), - &lowerBounds, &upperBounds); + unsigned pos = getPosition(value); + if (constantRequired) { + auto ubConst = constraints.getConstantBound( + FlatAffineValueConstraints::BoundType::UB, pos); + if (!ubConst.hasValue()) + return; + boundMap = + AffineMap::getConstantMap(ubConst.getValue(), value.getContext()); + return; + } + + SmallVector lowerBounds(1), upperBounds(1); + constraints.getSliceBounds(pos, 1, value.getContext(), &lowerBounds, + &upperBounds, + /*getClosedUB=*/true); // Verify `upperBounds[0]` is valid and has at least one result. if (!upperBounds[0] || upperBounds[0].getNumResults() == 0) return; @@ -265,7 +281,8 @@ FailureOr getConstantUpperBoundForIndex(Value value) { // Compute an upper bound for `value`. AffineMap boundMap; SmallVector boundOperands; - getUpperBoundForIndex(value, boundMap, boundOperands); + getUpperBoundForIndex(value, boundMap, boundOperands, + /*constantRequired=*/true); // Search the results of `boundMap` for constant upper bounds. SmallVector constantBounds; diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir index e37f0fc..0e0e2e1 100644 --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -3,6 +3,7 @@ // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1 pack-paddings=1,1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=PARTIAL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.depthwise_conv_2d_nhwc_hwc pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=1,2 pack-paddings=1,0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=DEPTHWISE_CONV_2D // MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)> // MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> @@ -535,3 +536,65 @@ func.func @padding_the_output_dims_only(%arg0: tensor<24x12xf32>, %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor into tensor<24x25xf32> func.return %5 : tensor<24x25xf32> } + +// ----- + +// DEPTHWISE_CONV_2D-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (4, -s0 + 11)> +// DEPTHWISE_CONV_2D-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * 2)> +// DEPTHWISE_CONV_2D-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * 2 + 1)> +// DEPTHWISE_CONV_2D-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * -2 + 8)> +// DEPTHWISE_CONV_2D-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 4)> + +#map0 = affine_map<()[s0] -> (4, -s0 + 11)> +#map1 = affine_map<()[s0] -> (s0 * 2)> +#map2 = affine_map<()[s0] -> (s0 * 2 + 1)> + +// DEPTHWISE_CONV_2D: depthwise_conv_2d_padding +// DEPTHWISE_CONV_2D-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<1x23x3x16xf32> +// DEPTHWISE_CONV_2D-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<3x3x16xf32> +// DEPTHWISE_CONV_2D-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<1x13x1x16xf32> +// DEPTHWISE_CONV_2D-SAME: %[[IV0:[0-9a-zA-Z]*]]: index +func.func @depthwise_conv_2d_padding(%arg0: tensor<1x23x3x16xf32>, + %arg1: tensor<3x3x16xf32>, + %arg2: tensor<1x13x1x16xf32>, + %iv0: index) -> tensor<1x?x1x16xf32> { + // DEPTHWISE_CONV_2D-DAG: %[[CST:.*]] = arith.constant 0. + // DEPTHWISE_CONV_2D-DAG: %[[C0:.*]] = arith.constant 0 : index + // DEPTHWISE_CONV_2D-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[IV0]]] + %0 = affine.min #map0()[%iv0] + %1 = affine.apply #map1()[%iv0] + %2 = affine.apply #map2()[%0] + + // DEPTHWISE_CONV_2D: %[[T3:.*]] = tensor.extract_slice %[[ARG0]] + // DEPTHWISE_CONV_2D: %[[T4:.*]] = tensor.extract_slice %[[ARG2]] + %3 = tensor.extract_slice %arg0[0, %1, 0, 0] [1, %2, 3, 16] [1, 1, 1, 1] : tensor<1x23x3x16xf32> to tensor<1x?x3x16xf32> + %4 = tensor.extract_slice %arg2[0, %iv0, 0, 0] [1, %0, 1, 16] [1, 1, 1, 1] : tensor<1x13x1x16xf32> to tensor<1x?x1x16xf32> + + // Check the padding on the input. + // DEPTHWISE_CONV_2D: %[[T5:.*]] = affine.apply #[[MAP3]]()[%[[T0]]] + // DEPTHWISE_CONV_2D: %[[T6:.*]] = tensor.pad %[[T3]] + // DEPTHWISE_CONV_2D-SAME: low[%[[C0]], %[[C0]], %[[C0]], %[[C0]]] + // DEPTHWISE_CONV_2D-SAME: high[%[[C0]], %[[T5]], %[[C0]], %[[C0]]] + // DEPTHWISE_CONV_2D: tensor.yield %[[CST]] : f32 + + // Check the padding on the output. + // DEPTHWISE_CONV_2D: %[[T7:.*]] = affine.apply #[[MAP4]]()[%[[T0]]] + // DEPTHWISE_CONV_2D: %[[T8:.*]] = tensor.pad %[[T4]] + // DEPTHWISE_CONV_2D-SAME: low[%[[C0]], %[[C0]], %[[C0]], %[[C0]]] + // DEPTHWISE_CONV_2D-SAME: high[%[[C0]], %[[T7]], %[[C0]], %[[C0]]] + // DEPTHWISE_CONV_2D: tensor.yield %[[CST]] : f32 + + // DEPTHWISE_CONV_2D: %[[T9:.*]] = linalg.depthwise_conv_2d_nhwc_hwc + // DEPTHWISE_CONV_2D-SAME: ins(%[[T6]], %[[ARG1]] : tensor<1x9x3x16xf32>, tensor<3x3x16xf32>) + // DEPTHWISE_CONV_2D-SAME: outs(%[[T8]] : tensor<1x4x1x16xf32>) + %5 = linalg.depthwise_conv_2d_nhwc_hwc + {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} + ins(%3, %arg1 : tensor<1x?x3x16xf32>, tensor<3x3x16xf32>) + outs(%4 : tensor<1x?x1x16xf32>) -> tensor<1x?x1x16xf32> + + // Check the extract_slice to crop the padded output before return. + // DEPTHWISE_CONV_2D: %[[T10:.*]] = tensor.extract_slice %[[T9]][0, 0, 0, 0] + // DEPTHWISE_CONV_2D-SAME: [1, %[[T0]], 1, 16] + // DEPTHWISE_CONV_2D: return %[[T10]] : tensor<1x?x1x16xf32> + return %5 : tensor<1x?x1x16xf32> +} -- 2.7.4