// indices of newly created loops.
static std::tuple<SmallVector<Range, 4>, LoopIndexToRangeIndexMap>
makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,
- ArrayRef<Value> allViewSizes,
- ArrayRef<Value> allTileSizes) {
+ ValueRange allShapeSizes, ValueRange allTileSizes) {
assert(allTileSizes.size() == map.getNumResults());
- // Apply `map` to get view sizes in loop order.
- auto viewSizes = applyMapToValues(b, loc, map, allViewSizes);
+ // Apply `map` to get shape sizes in loop order.
+ auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes);
SmallVector<Value, 4> tileSizes(allTileSizes.begin(), allTileSizes.end());
// Traverse the tile sizes, which are in loop order, erase zeros everywhere.
LoopIndexToRangeIndexMap loopIndexToRangeIndex;
for (int idx = 0, e = tileSizes.size(), zerosCount = 0; idx < e; ++idx) {
if (isZero(tileSizes[idx - zerosCount])) {
- viewSizes.erase(viewSizes.begin() + idx - zerosCount);
+ shapeSizes.erase(shapeSizes.begin() + idx - zerosCount);
tileSizes.erase(tileSizes.begin() + idx - zerosCount);
++zerosCount;
continue;
// Create a new range with the applied tile sizes.
SmallVector<Range, 4> res;
for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
- res.push_back(Range{std_constant_index(0), viewSizes[idx], tileSizes[idx]});
+ res.push_back(
+ Range{std_constant_index(0), shapeSizes[idx], tileSizes[idx]});
return std::make_tuple(res, loopIndexToRangeIndex);
}
-
namespace {
// Helper visitor to determine whether an AffineExpr is tiled.
// `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
//
struct TileCheck : public AffineExprVisitor<TileCheck> {
- TileCheck(ArrayRef<Value> tileSizes) : isTiled(false), tileSizes(tileSizes) {}
+ TileCheck(ValueRange tileSizes) : isTiled(false), tileSizes(tileSizes) {}
void visitDimExpr(AffineDimExpr expr) {
isTiled |= !isZero(tileSizes[expr.getPosition()]);
"nonpositive multiplying coefficient");
}
bool isTiled;
- ArrayRef<Value> tileSizes;
+ ValueRange tileSizes;
};
} // namespace
static void transformIndexedGenericOpIndices(
OpBuilder &b, LinalgOp op, SmallVectorImpl<Value> &ivs,
const LoopIndexToRangeIndexMap &loopIndexToRangeIndex) {
- assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
auto indexedGenericOp = dyn_cast<IndexedGenericOp>(op.getOperation());
if (!indexedGenericOp)
return;
}
}
-static bool isTiled(AffineExpr expr, ArrayRef<Value> tileSizes) {
+static bool isTiled(AffineExpr expr, ValueRange tileSizes) {
if (!expr)
return false;
TileCheck t(tileSizes);
return t.isTiled;
}
-// Checks whether the view with index `viewIndex` within `linalgOp` varies with
-// respect to a non-zero `tileSize`.
-static bool isTiled(AffineMap map, ArrayRef<Value> tileSizes) {
+// Checks whether the `map varies with respect to a non-zero `tileSize`.
+static bool isTiled(AffineMap map, ValueRange tileSizes) {
if (!map)
return false;
for (unsigned r = 0; r < map.getNumResults(); ++r)
return false;
}
-static SmallVector<Value, 4> makeTiledViews(OpBuilder &b, Location loc,
- LinalgOp linalgOp, AffineMap map,
- ArrayRef<Value> ivs,
- ArrayRef<Value> tileSizes,
- ArrayRef<Value> allViewSizes) {
- assert(linalgOp.hasBufferSemantics() &&
- "expected linalg op with buffer semantics");
+static SmallVector<Value, 4>
+makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp,
+ ValueRange operands, AffineMap map, ValueRange ivs,
+ ValueRange tileSizes, ValueRange allShapeSizes) {
+ assert(operands.size() == linalgOp.getShapedOperands().size());
assert(ivs.size() == static_cast<size_t>(llvm::count_if(
llvm::make_range(tileSizes.begin(), tileSizes.end()),
[](Value v) { return !isZero(v); })) &&
using namespace edsc::op;
- auto viewSizes = applyMapToValues(b, loc, map, allViewSizes);
+ auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes);
// Construct (potentially temporary) mins and maxes on which to apply maps
- // that define tile subviews.
- SmallVector<Value, 8> lbs, subViewSizes;
+ // that define tile subshapes.
+ SmallVector<Value, 8> lbs, subShapeSizes;
for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
bool isTiled = !isZero(tileSizes[idx]);
lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0));
// Before composing, we need to make range a closed interval.
- Value size = isTiled ? tileSizes[idx] : viewSizes[idx];
- subViewSizes.push_back(size - std_constant_index(1));
+ Value size = isTiled ? tileSizes[idx] : shapeSizes[idx];
+ subShapeSizes.push_back(size - std_constant_index(1));
}
auto *op = linalgOp.getOperation();
SmallVector<Value, 4> res;
res.reserve(op->getNumOperands());
- auto viewIteratorBegin = linalgOp.getInputsAndOutputBuffers().begin();
- for (unsigned viewIndex = 0; viewIndex < linalgOp.getNumInputsAndOutputs();
- ++viewIndex) {
- Value view = *(viewIteratorBegin + viewIndex);
- auto viewType = view.getType().cast<MemRefType>();
- unsigned rank = viewType.getRank();
- auto mapAttr = linalgOp.indexing_maps()[viewIndex];
- auto map = mapAttr.cast<AffineMapAttr>().getValue();
- // If the view is not tiled, we can use it as is.
+ for (auto en : llvm::enumerate(operands)) {
+ Value shapedOp = en.value();
+ ShapedType shapedType = shapedOp.getType().cast<ShapedType>();
+ unsigned rank = shapedType.getRank();
+ AffineMap map = linalgOp.getIndexingMap(en.index());
+ // If the shape is not tiled, we can use it as is.
if (!isTiled(map, tileSizes)) {
- res.push_back(view);
+ res.push_back(shapedOp);
continue;
}
- // Construct a new subview for the tile.
+ // Construct a new subview / subtensor for the tile.
SmallVector<Value, 4> offsets, sizes, strides;
offsets.reserve(rank);
sizes.reserve(rank);
for (unsigned r = 0; r < rank; ++r) {
if (!isTiled(map.getSubMap({r}), tileSizes)) {
offsets.push_back(std_constant_index(0));
- sizes.push_back(std_dim(view, r));
+ sizes.push_back(std_dim(shapedOp, r));
strides.push_back(std_constant_index(1));
continue;
}
// Tiling creates a new slice at the proper index, the slice step is 1
- // (i.e. the slice view does not subsample, stepping occurs in the loop).
+ // (i.e. the op does not subsample, stepping occurs in the loop).
auto m = map.getSubMap({r});
auto offset = applyMapToValues(b, loc, m, lbs).front();
offsets.push_back(offset);
- auto closedIntSize = applyMapToValues(b, loc, m, subViewSizes).front();
+ auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
// Resulting size needs to be made half open interval again.
auto size = closedIntSize + std_constant_index(1);
- // The size of the subview should be trimmed to avoid out-of-bounds
- // accesses, unless we statically know the subview size divides the view
- // size evenly.
- int64_t viewSize = viewType.getDimSize(r);
+ // The size of the subview / subtensor should be trimmed to avoid
+ // out-of-bounds accesses, unless we statically know the subshape size
+ // divides the shape size evenly.
+ int64_t shapeSize = shapedType.getDimSize(r);
auto sizeCst = size.getDefiningOp<ConstantIndexOp>();
- if (ShapedType::isDynamic(viewSize) || !sizeCst ||
- (viewSize % sizeCst.getValue()) != 0) {
+ if (ShapedType::isDynamic(shapeSize) || !sizeCst ||
+ (shapeSize % sizeCst.getValue()) != 0) {
// Compute min(size, dim - offset) to avoid out-of-bounds accesses.
auto minMap = AffineMap::get(
/*dimCount=*/3, /*symbolCount=*/0,
getAffineDimExpr(/*position=*/1, b.getContext()) -
getAffineDimExpr(/*position=*/2, b.getContext())},
b.getContext());
- auto d = std_dim(view, r);
+ auto d = std_dim(shapedOp, r);
size =
affine_min(b.getIndexType(), minMap, ValueRange{size, d, offset});
}
strides.push_back(std_constant_index(1));
}
- res.push_back(b.create<SubViewOp>(loc, view, offsets, sizes, strides));
+ if (shapedType.isa<MemRefType>())
+ res.push_back(
+ b.create<SubViewOp>(loc, shapedOp, offsets, sizes, strides));
+ else
+ res.push_back(
+ b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
}
return res;
template <typename LoopTy>
static Optional<TiledLinalgOp>
-tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef<Value> tileSizes,
+tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
const LinalgTilingOptions &options) {
auto nLoops = op.getNumLoops();
// Initial tile sizes may be too big, only take the first nLoops.
}
// 1. Build the tiled loop ranges.
- auto allViewSizes = getViewSizes(b, op);
+ auto allShapeSizes = getShape(b, op);
// The flattened loopToOperandRangesMaps is expected to be an invertible
// permutation map (asserted in the inverse calculation).
auto mapsRange = op.indexing_maps().getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
- auto viewSizesToLoopsMap = inversePermutation(concatAffineMaps(maps));
- if (!viewSizesToLoopsMap)
+ auto shapeSizesToLoopsMap = inversePermutation(concatAffineMaps(maps));
+ if (!shapeSizesToLoopsMap)
return llvm::None;
SmallVector<Range, 4> loopRanges;
LoopIndexToRangeIndexMap loopIndexToRangeIndex;
std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges(
- b, op.getLoc(), viewSizesToLoopsMap, allViewSizes, tileSizes);
+ b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes);
SmallVector<Attribute, 4> iteratorTypes;
for (auto attr :
enumerate(op.iterator_types().cast<ArrayAttr>().getValue())) {
// 2. Create the tiled loops.
LinalgOp res = op;
- SmallVector<Value, 4> ivs;
+ SmallVector<Value, 4> ivs, tensorResults;
+ auto initTensors = op.getInitTensors();
GenerateLoopNest<LoopTy>::doit(
- loopRanges, /*iterArgInitValues*/ {}, iteratorTypes,
+ loopRanges, /*iterArgInitValues*/ initTensors, iteratorTypes,
[&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector {
auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();
ivs.assign(localIvs.begin(), localIvs.end());
- SmallVector<Value, 4> ivValues(ivs.begin(), ivs.end());
- // If we have to apply a permutation to the tiled loop nest, we have to
- // reorder the induction variables This permutation is the right one
- // assuming that loopRanges have previously been permuted by
- // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation
- // of that one: (d0,d1,d2)->(d2,d0,d1)
+ // When an `interchangeVector` is present, it has been applied to the
+ // loop ranges and the iterator types. Apply its inverse to the
+ // resulting loop `ivs` to match the op definition.
+ SmallVector<Value, 4> interchangedIvs;
if (!options.interchangeVector.empty())
- ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues);
-
- auto views = makeTiledViews(b, loc, op, viewSizesToLoopsMap, ivValues,
- tileSizes, allViewSizes);
- auto operands = getAssumedNonViewOperands(op);
- views.append(operands.begin(), operands.end());
- res = op.clone(b, loc, /*resultTypes*/ {}, views);
- return scf::ValueVector{};
+ interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
+ else
+ interchangedIvs.assign(ivs.begin(), ivs.end());
+
+ assert(op.getNumInitTensors() == iterArgs.size() &&
+ "num init tensors must match number of loop iter arguments");
+ // This uses knowledge about position of the init tensor in the list
+ // of operands.
+ auto operands = llvm::to_vector<4>(op.getShapedOperands());
+ std::copy(iterArgs.begin(), iterArgs.end(),
+ operands.begin() + op.getNumInputsAndOutputBuffers());
+
+ SmallVector<Value, 4> tiledOperands =
+ makeTiledShapes(b, loc, op, operands, shapeSizesToLoopsMap,
+ interchangedIvs, tileSizes, allShapeSizes);
+ auto nonShapedOperands = op.getAssumedNonShapedOperands();
+ tiledOperands.append(nonShapedOperands.begin(),
+ nonShapedOperands.end());
+
+ // If LinalgOp has results, they must all be tied to init tensors.
+ // We enforce this to ensure all tiled ops have been rewritten in
+ // "init tensor" form. This ensures tiling has anchor values into which
+ // to subtensor / subtensor_insert. Otherwise tiling would need to
+ // allocate which is not acceptable.
+ // This would not be the case with a special terminator op that
+ // generates the whole tensor (instead of inserting a subtensor). But
+ // the generator-based abstraction has other issues.
+ assert(op.getNumInitTensors() == op.getOperation()->getNumResults() &&
+ "expected same number of init tensors as number of results");
+
+ // Handle init tensor operands.
+ // This uses knowledge about position of the init tensor in the list
+ // of operands.
+ // TODO: InterfaceAdaptor ?
+ SmallVector<Type, 4> resultTensorTypes;
+ for (auto idx : llvm::seq<unsigned>(0, op.getNumInitTensors()))
+ resultTensorTypes.push_back(
+ tiledOperands[op.getNumInputsAndOutputBuffers() + idx].getType());
+
+ res = op.clone(b, loc, resultTensorTypes, tiledOperands);
+
+ // Insert a subtensor_insert for each init subtensor.
+ for (unsigned idx = 0, e = op.getNumInitTensors(); idx != e; ++idx) {
+ Value initTensor =
+ tiledOperands[op.getNumInputsAndOutputBuffers() + idx];
+ if (auto subtensor = initTensor.getDefiningOp<SubTensorOp>()) {
+ tensorResults.push_back(b.create<SubTensorInsertOp>(
+ loc, subtensor.source().getType(),
+ res.getOperation()->getResult(idx), subtensor.source(),
+ subtensor.offsets(), subtensor.sizes(), subtensor.strides(),
+ subtensor.static_offsets(), subtensor.static_sizes(),
+ subtensor.static_strides()));
+ } else {
+ tensorResults.push_back(res.getOperation()->getResult(idx));
+ }
+ }
+ return scf::ValueVector(tensorResults.begin(), tensorResults.end());
},
options.distribution);
loops.push_back(nullptr);
}
}
- return TiledLinalgOp{res, loops};
+
+ // 5. Get the tensor results from the outermost loop if available. Otherwise
+ // use the previously captured `tensorResults`.
+ Operation *outermostLoop = nullptr;
+ for (Operation *loop : loops)
+ if ((outermostLoop = loop))
+ break;
+
+ return TiledLinalgOp{
+ res, loops, outermostLoop ? outermostLoop->getResults() : tensorResults};
}
template <typename LoopTy>
b.setInsertionPoint(op);
ScopedContext scope(b, op.getLoc());
- assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
// Enforce the convention that "tiling by zero" skips tiling a particular
// dimension. This convention is significantly simpler to handle instead of
// adjusting affine maps to account for missing dimensions.
scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
+ SubTensorOp::getCanonicalizationPatterns(patterns, ctx);
SubViewOp::getCanonicalizationPatterns(patterns, ctx);
+ TensorCastOp::getCanonicalizationPatterns(patterns, ctx);
ViewOp::getCanonicalizationPatterns(patterns, ctx);
CanonicalizationPatternList<
#define GET_OP_LIST