#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
-
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
-using mlir::edsc::intrinsics::AffineIndexedValue;
-using mlir::edsc::intrinsics::MemRefIndexedValue;
-
namespace mlir {
class AffineExpr;
class AffineForOp;
namespace linalg {
class LinalgDependenceGraph;
-/// A struct containing the Linalg producer before and after fusion.
-/// When operating on tensors, `fusedProducer` may feed into a `tensor.cast` op
-/// before the consumer Linalg op, until enough canonicalizations have applied.
-struct FusionInfo {
- LinalgOp originalProducer;
- LinalgOp fusedProducer;
-};
+//===----------------------------------------------------------------------===//
+// General utilities
+//===----------------------------------------------------------------------===//
-/// A struct containing common matchers over linalg op's region.
-struct RegionMatcher {
- enum class BinaryOpKind {
- IAdd,
- };
+/// Apply the permutation defined by `permutation` to `inVec`.
+/// Element `i` in `inVec` is mapped to location `j = permutation[i]`.
+/// E.g.: for an input vector `inVec = ['a', 'b', 'c']` and a permutation vector
+/// `permutation = [2, 0, 1]`, this function leaves `inVec = ['c', 'a', 'b']`.
+template <typename T, unsigned N>
+void applyPermutationToVector(SmallVector<T, N> &inVec,
+ ArrayRef<unsigned> permutation) {
+ SmallVector<T, N> auxVec(inVec.size());
+ for (unsigned i = 0; i < permutation.size(); ++i)
+ auxVec[i] = inVec[permutation[i]];
+ inVec = auxVec;
+}
- /// Matches the given linalg op if its body is performing binary operation on
- /// int or float scalar values and returns the binary op kind.
- ///
- /// The linalg op's region is expected to be
- /// ```
- /// {
- /// ^bb(%a: <scalar-type>, %b: <scalar-type>):
- /// %0 = <binary-op> %a, %b: <scalar-type>
- /// linalg.yield %0: <scalar-type>
- /// }
- /// ```
- static Optional<BinaryOpKind> matchAsScalarBinaryOp(GenericOp op);
-};
+/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp
+/// is a constant then return a new value set to the smallest such constant.
+/// If `size` comes from a ConstantOp, return the constant.
+/// Otherwise return nullptr.
+IntegerAttr getSmallestBoundingIndex(Value size);
+
+//===----------------------------------------------------------------------===//
+// Iterator type utilities
+//===----------------------------------------------------------------------===//
/// Checks if an iterator_type attribute is parallel.
bool isParallelIteratorType(Attribute attr);
/// Checks if an iterator_type attribute is parallel.
bool isWindowIteratorType(Attribute attr);
+//===----------------------------------------------------------------------===//
+// Fusion utilities
+//===----------------------------------------------------------------------===//
+
/// Checks whether the specific `producer` is the last write to exactly the
/// whole `consumedView`. This checks structural dominance, that the dependence
/// is a RAW without any interleaved write to any piece of `consumedView`.
bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
Value consumedView, LinalgOp producer);
+/// Creates subtensor/subview ops for all `tiledOperands` of the given
+/// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop
+/// nest for tiling with the given induction variables `ivs` and tile sizes
+/// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the
+/// implicit loops in `linalgOp`.
+///
+/// Note that a constant zero in `tileSizes` means no tiling at that implicit
+/// loop. The number of non-zero values in `tileSizes` should be equal to the
+/// number of values in `ivs`.
+SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
+ LinalgOp linalgOp,
+ ArrayRef<Value> tiledOperands,
+ ValueRange ivs, ValueRange tileSizes,
+ ArrayRef<Value> sizeBounds);
+
using FusableOpDependencesTy = llvm::MapVector<
Operation *,
SmallVector<LinalgDependenceGraph::LinalgDependenceGraphElem, 1>>;
findAllFusableDependences(ArrayRef<LinalgOp> ops,
const LinalgDependenceGraph &dependenceGraph);
+/// A struct containing the Linalg producer before and after fusion.
+/// When operating on tensors, `fusedProducer` may feed into a `tensor.cast` op
+/// before the consumer Linalg op, until enough canonicalizations have applied.
+struct FusionInfo {
+ LinalgOp originalProducer;
+ LinalgOp fusedProducer;
+};
+
/// Fuses producer into consumer if the producer is structurally feasible and
/// the fusion would not violate dependencies.
/// Implements the fusion part of the "tileAndFuse on buffers" transformation
Optional<SmallVector<Value, 1>> fuseTensorOps(PatternRewriter &rewriter,
OpOperand &consumerOpOperand);
-/// Apply the permutation defined by `permutation` to `inVec`.
-/// Element `i` in `inVec` is mapped to location `j = permutation[i]`.
-/// E.g.: for an input vector `inVec = ['a', 'b', 'c']` and a permutation vector
-/// `permutation = [2, 0, 1]`, this function leaves `inVec = ['c', 'a', 'b']`.
-template <typename T, unsigned N>
-void applyPermutationToVector(SmallVector<T, N> &inVec,
- ArrayRef<unsigned> permutation) {
- SmallVector<T, N> auxVec(inVec.size());
- for (unsigned i = 0; i < permutation.size(); ++i)
- auxVec[i] = inVec[permutation[i]];
- inVec = auxVec;
-}
-
-/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp
-/// is a constant then return a new value set to the smallest such constant.
-/// If `size` comes from a ConstantOp, return the constant.
-/// Otherwise return nullptr.
-IntegerAttr getSmallestBoundingIndex(Value size);
+//===----------------------------------------------------------------------===//
+// Distribution utilities
+//===----------------------------------------------------------------------===//
/// Scheme used to distribute loops to processors.
enum class DistributionMethod {
SmallVector<DistributionMethod, 0> distributionMethod = {};
};
+//===----------------------------------------------------------------------===//
+// Generic op region utilities
+//===----------------------------------------------------------------------===//
+
+/// A struct containing common matchers over linalg op's region.
+struct RegionMatcher {
+ enum class BinaryOpKind {
+ IAdd,
+ };
+
+ /// Matches the given linalg op if its body is performing binary operation on
+ /// int or float scalar values and returns the binary op kind.
+ ///
+ /// The linalg op's region is expected to be
+ /// ```
+ /// {
+ /// ^bb(%a: <scalar-type>, %b: <scalar-type>):
+ /// %0 = <binary-op> %a, %b: <scalar-type>
+ /// linalg.yield %0: <scalar-type>
+ /// }
+ /// ```
+ static Optional<BinaryOpKind> matchAsScalarBinaryOp(GenericOp op);
+};
+
+//===----------------------------------------------------------------------===//
+// Loop nest utilities
+//===----------------------------------------------------------------------===//
+
/// Utility class used to generate nested loops with ranges described by
/// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
/// is used to generate the body of the innermost loop. It is passed a range
struct GenerateLoopNest {
using IndexedValueTy =
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
- AffineIndexedValue, MemRefIndexedValue>::type;
+ edsc::intrinsics::AffineIndexedValue,
+ edsc::intrinsics::MemRefIndexedValue>::type;
static void
doit(ArrayRef<Range> loopRanges, ValueRange iterArgInitValues,
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/Transforms/FoldUtils.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
Range{std_constant_index(0), shapeSizes[idx], tileSizes[idx]});
return std::make_tuple(res, loopIndexToRangeIndex);
}
-namespace {
-
-// Helper visitor to determine whether an AffineExpr is tiled.
-// This is achieved by traversing every AffineDimExpr with position `pos` and
-// checking whether the corresponding `tileSizes[pos]` is non-zero.
-// This also enforces only positive coefficients occur in multiplications.
-//
-// Example:
-// `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
-//
-struct TileCheck : public AffineExprVisitor<TileCheck> {
- TileCheck(ValueRange tileSizes) : isTiled(false), tileSizes(tileSizes) {}
-
- void visitDimExpr(AffineDimExpr expr) {
- isTiled |= !isZero(tileSizes[expr.getPosition()]);
- }
- void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
- visit(expr.getLHS());
- visit(expr.getRHS());
- if (expr.getKind() == mlir::AffineExprKind::Mul)
- assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
- "nonpositive multiplying coefficient");
- }
- bool isTiled;
- ValueRange tileSizes;
-};
-
-} // namespace
// IndexedGenericOp explicitly uses induction variables in the loop body. The
// values of the indices that are used in the loop body for any given access of
}
}
-static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
- if (!expr)
- return false;
- TileCheck t(tileSizes);
- t.visit(expr);
- return t.isTiled;
-}
-
-// Checks whether the `map varies with respect to a non-zero `tileSize`.
-static bool isTiled(AffineMap map, ValueRange tileSizes) {
- if (!map)
- return false;
- for (unsigned r = 0; r < map.getNumResults(); ++r)
- if (isTiled(map.getResult(r), tileSizes))
- return true;
- return false;
-}
-
-static SmallVector<Value, 4>
-makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp,
- ArrayRef<Value> tiledOperands, AffineMap map, ValueRange ivs,
- ValueRange tileSizes, ValueRange allShapeSizes) {
- assert(ivs.size() == static_cast<size_t>(llvm::count_if(
- llvm::make_range(tileSizes.begin(), tileSizes.end()),
- [](Value v) { return !isZero(v); })) &&
- "expected as many ivs as non-zero sizes");
-
- using namespace edsc::op;
-
- auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes);
- // Construct (potentially temporary) mins and maxes on which to apply maps
- // that define tile subshapes.
- SmallVector<Value, 8> lbs, subShapeSizes;
- for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
- bool isTiled = !isZero(tileSizes[idx]);
- lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0));
- // Before composing, we need to make range a closed interval.
- Value size = isTiled ? tileSizes[idx] : shapeSizes[idx];
- subShapeSizes.push_back(size - std_constant_index(1));
- }
-
- SmallVector<Value, 4> res;
- res.reserve(tiledOperands.size());
- for (auto en : llvm::enumerate(tiledOperands)) {
- Value shapedOp = en.value();
- ShapedType shapedType = shapedOp.getType().cast<ShapedType>();
- unsigned rank = shapedType.getRank();
- AffineMap map = linalgOp.getIndexingMap(en.index());
- // If the shape is not tiled, we can use it as is.
- if (!isTiled(map, tileSizes)) {
- res.push_back(shapedOp);
- continue;
- }
-
- // Construct a new subview / subtensor for the tile.
- SmallVector<OpFoldResult, 4> offsets, sizes, strides;
- offsets.reserve(rank);
- sizes.reserve(rank);
- strides.reserve(rank);
- for (unsigned r = 0; r < rank; ++r) {
- if (!isTiled(map.getSubMap({r}), tileSizes)) {
- offsets.push_back(b.getIndexAttr(0));
- sizes.push_back(memref_dim(shapedOp, r).value);
- strides.push_back(b.getIndexAttr(1));
- continue;
- }
-
- // Tiling creates a new slice at the proper index, the slice step is 1
- // (i.e. the op does not subsample, stepping occurs in the loop).
- auto m = map.getSubMap({r});
- auto offset = applyMapToValues(b, loc, m, lbs).front();
- offsets.push_back(offset);
- auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
- // Resulting size needs to be made half open interval again.
- auto size = closedIntSize + std_constant_index(1);
-
- // The size of the subview / subtensor should be trimmed to avoid
- // out-of-bounds accesses, unless we statically know the subshape size
- // divides the shape size evenly.
- int64_t shapeSize = shapedType.getDimSize(r);
- auto sizeCst = size.getDefiningOp<ConstantIndexOp>();
- if (ShapedType::isDynamic(shapeSize) || !sizeCst ||
- (shapeSize % sizeCst.getValue()) != 0) {
- // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
- auto minMap = AffineMap::get(
- /*dimCount=*/3, /*symbolCount=*/0,
- {getAffineDimExpr(/*position=*/0, b.getContext()),
- getAffineDimExpr(/*position=*/1, b.getContext()) -
- getAffineDimExpr(/*position=*/2, b.getContext())},
- b.getContext());
- Value d = memref_dim(shapedOp, r);
- SmallVector<Value, 4> operands{size, d, offset};
- fullyComposeAffineMapAndOperands(&minMap, &operands);
- size = affine_min(b.getIndexType(), minMap, operands);
- }
-
- sizes.push_back(size);
- strides.push_back(b.getIndexAttr(1));
- }
-
- if (shapedType.isa<MemRefType>())
- res.push_back(
- b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
- else
- res.push_back(
- b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
- }
-
- return res;
-}
-
template <typename LoopTy>
static Optional<TiledLinalgOp>
tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
assert(outputBuffers.empty() || iterArgs.empty());
operands.append(outputBuffers.begin(), outputBuffers.end());
operands.append(iterArgs.begin(), iterArgs.end());
- SmallVector<Value, 4> tiledOperands =
- makeTiledShapes(b, loc, op, operands, shapeSizesToLoopsMap,
- interchangedIvs, tileSizes, allShapeSizes);
+ auto sizeBounds =
+ applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
+ SmallVector<Value, 4> tiledOperands = makeTiledShapes(
+ b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
auto nonShapedOperands = op.getAssumedNonShapedOperands();
tiledOperands.append(nonShapedOperands.begin(),
nonShapedOperands.end());
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/Transforms/LoopUtils.h"
using namespace mlir;
+using namespace mlir::edsc;
+using namespace mlir::edsc::intrinsics;
using namespace mlir::linalg;
using namespace mlir::scf;
+static bool isZero(Value v) {
+ if (auto cst = v.getDefiningOp<ConstantIndexOp>())
+ return cst.getValue() == 0;
+ return false;
+}
+
+namespace {
+
+// Helper visitor to determine whether an AffineExpr is tiled.
+// This is achieved by traversing every AffineDimExpr with position `pos` and
+// checking whether the corresponding `tileSizes[pos]` is non-zero.
+// This also enforces only positive coefficients occur in multiplications.
+//
+// Example:
+// `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
+//
+struct TileCheck : public AffineExprVisitor<TileCheck> {
+ TileCheck(ValueRange tileSizes) : isTiled(false), tileSizes(tileSizes) {}
+
+ void visitDimExpr(AffineDimExpr expr) {
+ isTiled |= !isZero(tileSizes[expr.getPosition()]);
+ }
+ void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
+ visit(expr.getLHS());
+ visit(expr.getRHS());
+ if (expr.getKind() == mlir::AffineExprKind::Mul)
+ assert(expr.getRHS().cast<AffineConstantExpr>().getValue() > 0 &&
+ "nonpositive multiplying coefficient");
+ }
+ bool isTiled;
+ ValueRange tileSizes;
+};
+
+} // namespace
+
+static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
+ if (!expr)
+ return false;
+ TileCheck t(tileSizes);
+ t.visit(expr);
+ return t.isTiled;
+}
+
+// Checks whether the `map varies with respect to a non-zero `tileSize`.
+static bool isTiled(AffineMap map, ValueRange tileSizes) {
+ if (!map)
+ return false;
+ for (unsigned r = 0; r < map.getNumResults(); ++r)
+ if (isTiled(map.getResult(r), tileSizes))
+ return true;
+ return false;
+}
+
Optional<RegionMatcher::BinaryOpKind>
RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
auto ®ion = op.region();
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
}
+SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
+ LinalgOp linalgOp,
+ ArrayRef<Value> tiledOperands,
+ ValueRange ivs, ValueRange tileSizes,
+ ArrayRef<Value> sizeBounds) {
+ assert(ivs.size() == static_cast<size_t>(llvm::count_if(
+ llvm::make_range(tileSizes.begin(), tileSizes.end()),
+ [](Value v) { return !isZero(v); })) &&
+ "expected as many ivs as non-zero sizes");
+
+ using namespace edsc::op;
+
+ // Construct (potentially temporary) mins and maxes on which to apply maps
+ // that define tile subshapes.
+ SmallVector<Value, 8> lbs, subShapeSizes;
+ for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
+ bool isTiled = !isZero(tileSizes[idx]);
+ lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0));
+ // Before composing, we need to make range a closed interval.
+ Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
+ subShapeSizes.push_back(size - std_constant_index(1));
+ }
+
+ MLIRContext *context = builder.getContext();
+ SmallVector<Value, 4> tiledShapes;
+ tiledShapes.reserve(tiledOperands.size());
+ for (auto en : llvm::enumerate(tiledOperands)) {
+ Value shapedOp = en.value();
+ ShapedType shapedType = shapedOp.getType().cast<ShapedType>();
+ unsigned rank = shapedType.getRank();
+ AffineMap map = linalgOp.getIndexingMap(en.index());
+ // If the shape is not tiled, we can use it as is.
+ if (!isTiled(map, tileSizes)) {
+ tiledShapes.push_back(shapedOp);
+ continue;
+ }
+
+ // Construct a new subview / subtensor for the tile.
+ SmallVector<OpFoldResult, 4> offsets, sizes, strides;
+ offsets.reserve(rank);
+ sizes.reserve(rank);
+ strides.reserve(rank);
+ for (unsigned r = 0; r < rank; ++r) {
+ if (!isTiled(map.getSubMap({r}), tileSizes)) {
+ offsets.push_back(builder.getIndexAttr(0));
+ sizes.push_back(memref_dim(shapedOp, r).value);
+ strides.push_back(builder.getIndexAttr(1));
+ continue;
+ }
+
+ // Tiling creates a new slice at the proper index, the slice step is 1
+ // (i.e. the op does not subsample, stepping occurs in the loop).
+ auto m = map.getSubMap({r});
+ auto offset = applyMapToValues(builder, loc, m, lbs).front();
+ offsets.push_back(offset);
+ auto closedIntSize =
+ applyMapToValues(builder, loc, m, subShapeSizes).front();
+ // Resulting size needs to be made half open interval again.
+ auto size = closedIntSize + std_constant_index(1);
+
+ // The size of the subview / subtensor should be trimmed to avoid
+ // out-of-bounds accesses, unless we statically know the subshape size
+ // divides the shape size evenly.
+ int64_t shapeSize = shapedType.getDimSize(r);
+ auto sizeCst = size.getDefiningOp<ConstantIndexOp>();
+ if (ShapedType::isDynamic(shapeSize) || !sizeCst ||
+ (shapeSize % sizeCst.getValue()) != 0) {
+ AffineExpr dim0, dim1, dim2;
+ bindDims(context, dim0, dim1, dim2);
+ // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
+ auto minMap = AffineMap::get(
+ /*dimCount=*/3, /*symbolCount=*/0, {dim0, dim1 - dim2}, context);
+ Value d = memref_dim(shapedOp, r);
+ SmallVector<Value, 4> operands{size, d, offset};
+ fullyComposeAffineMapAndOperands(&minMap, &operands);
+ size = affine_min(builder.getIndexType(), minMap, operands);
+ }
+
+ sizes.push_back(size);
+ strides.push_back(builder.getIndexAttr(1));
+ }
+
+ if (shapedType.isa<MemRefType>())
+ tiledShapes.push_back(builder.create<memref::SubViewOp>(
+ loc, shapedOp, offsets, sizes, strides));
+ else
+ tiledShapes.push_back(
+ builder.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
+ }
+
+ return tiledShapes;
+}
+
} // namespace linalg
} // namespace mlir