From 92df395068f1ed72632f8e601b10de89034398ac Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 4 Apr 2019 19:57:42 -0700 Subject: [PATCH] Linalg portion of the tutorial - part 4 This CL adds declarative tiling support in the linalg dialect by providing: 1. loop tiling on linalg ops by simply calling into mlir::tile 2. view tiling on linalg ops by: a. computing the subview between for each tile dimension based on the loop tile size and the mapping of loops to operand ranges. b. declaring that the tiled form of a tensorcontraction is the same tensorcontraction on subviews, which essentially gives us a recursive form. Point 2.b is potentially subject to change in the future. -- PiperOrigin-RevId: 242058658 --- .../Linalg/Linalg2/include/linalg2/TensorOps.h | 22 ++- .../Linalg/Linalg3/include/linalg3/TensorOps-inl.h | 33 +++- .../Linalg/Linalg3/include/linalg3/TensorOps.h | 2 +- .../Linalg/Linalg3/include/linalg3/Transforms.h | 33 ++++ mlir/examples/Linalg/Linalg3/lib/TensorOps.cpp | 23 ++- mlir/examples/Linalg/Linalg3/lib/Transforms.cpp | 79 ++++---- mlir/examples/Linalg/Linalg4/Example.cpp | 176 +++++++++++++++++ .../Linalg/Linalg4/include/linalg4/Transforms.h | 47 +++++ mlir/examples/Linalg/Linalg4/lib/Transforms.cpp | 214 +++++++++++++++++++++ 9 files changed, 563 insertions(+), 66 deletions(-) create mode 100644 mlir/examples/Linalg/Linalg4/Example.cpp create mode 100644 mlir/examples/Linalg/Linalg4/include/linalg4/Transforms.h create mode 100644 mlir/examples/Linalg/Linalg4/lib/Transforms.cpp diff --git a/mlir/examples/Linalg/Linalg2/include/linalg2/TensorOps.h b/mlir/examples/Linalg/Linalg2/include/linalg2/TensorOps.h index 406bcaa..b8d9f8f 100644 --- a/mlir/examples/Linalg/Linalg2/include/linalg2/TensorOps.h +++ b/mlir/examples/Linalg/Linalg2/include/linalg2/TensorOps.h @@ -70,8 +70,13 @@ public: ////////////////////////////////////////////////////////////////////////////// // Used in Linalg3 and later. ////////////////////////////////////////////////////////////////////////////// - mlir::Value *getInputView(unsigned i); - mlir::Value *getOutputView(unsigned i); + mlir::Value *getInputView(unsigned viewIndex); + mlir::Value *getOutputView(unsigned viewIndex); + mlir::Value *getView(unsigned viewIndex) { + return viewIndex < getNumInputs() + ? getInputView(viewIndex) + : getOutputView(viewIndex - getNumInputs()); + } /// Each op is responsible for declaring how it lowers itself to scalar form, /// given the enclosing parallel and reduction induction variables. @@ -86,10 +91,9 @@ public: /// ConcreteOp implementation, the resulting map must match those. /// In favorable cases, this can be calculated by an analysis but specifying /// it explicitly is not expensive and generalizes to cases where an analysis - /// is not available. - /// For details, see the description of loopsToOperandRangesMap in each - /// ConcreteOp - mlir::AffineMap loopsToOperandRangesMap(); + /// is not available. For details, see the description of + /// loopsToOperandRangeMaps in each ConcreteOp. + llvm::SmallVector loopsToOperandRangeMaps(); }; /// Implements c = A * B where c is a scalar and A and B are 1-D vectors. @@ -135,7 +139,7 @@ public: /// (d0) -> (d0, d0)(%k) /// And the operands ranges are: /// (%k, %k) - mlir::AffineMap loopsToOperandRangesMap(); + llvm::SmallVector loopsToOperandRangeMaps(); /// Given an enclosing reduction loop with iv `r_i`, emits MLIR corresponding /// to: @@ -195,7 +199,7 @@ public: /// (d0, d1) -> (d0, d1, d1, d0)(%m, %k) /// And the operands ranges are: /// (%m, %k, %k, %m) - mlir::AffineMap loopsToOperandRangesMap(); + llvm::SmallVector loopsToOperandRangeMaps(); /// Given an enclosing parallel loop with iv `i` and an enclosing parallel /// loop with iv `r_j`, emits MLIR corresponding to: @@ -255,7 +259,7 @@ public: /// (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k) /// And the operands ranges are: /// (%m, %k, %k, %n, %m, %n) - mlir::AffineMap loopsToOperandRangesMap(); + llvm::SmallVector loopsToOperandRangeMaps(); /// Given a enclosing parallel loops with ivs `i` and `j`, and an enclosing /// reduction loop with iv `r_k`, emits MLIR corresponding to: diff --git a/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps-inl.h b/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps-inl.h index 60d99ab..b651053 100644 --- a/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps-inl.h +++ b/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps-inl.h @@ -29,20 +29,20 @@ template mlir::Value * -linalg::TensorContractionBase::getInputView(unsigned i) { - return *(getInputs().begin() + i); +linalg::TensorContractionBase::getInputView(unsigned viewIndex) { + return *(getInputs().begin() + viewIndex); } template mlir::Value * -linalg::TensorContractionBase::getOutputView(unsigned i) { - return *(getOutputs().begin() + i); +linalg::TensorContractionBase::getOutputView(unsigned viewIndex) { + return *(getOutputs().begin() + viewIndex); } template -mlir::AffineMap -linalg::TensorContractionBase::loopsToOperandRangesMap() { - return static_cast(this)->loopsToOperandRangesMap(); +llvm::SmallVector +linalg::TensorContractionBase::loopsToOperandRangeMaps() { + return static_cast(this)->loopsToOperandRangeMaps(); } template @@ -56,7 +56,24 @@ void linalg::TensorContractionBase::emitScalarImplementation( template mlir::AffineMap linalg::operandRangesToLoopsMap( linalg::TensorContractionBase &tensorContraction) { - return inverseSubMap(tensorContraction.loopsToOperandRangesMap()); + mlir::AffineMap current; + // Individual submaps may not be invertible but their union must be invertible + // by construction. + for (auto m : tensorContraction.loopsToOperandRangeMaps()) { + if (!m) + continue; + if (!current) { + current = m; + continue; + } + llvm::SmallVector results(current.getResults().begin(), + current.getResults().end()); + results.append(m.getResults().begin(), m.getResults().end()); + current = mlir::AffineMap::get( + std::max(current.getNumDims(), m.getNumDims()), + current.getNumSymbols() + m.getNumSymbols(), results, {}); + } + return inverseSubMap(current); } // Extract the ranges from a given ViewOp or SliceOp. diff --git a/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps.h b/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps.h index 4ade192..bf5a377 100644 --- a/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps.h +++ b/mlir/examples/Linalg/Linalg3/include/linalg3/TensorOps.h @@ -23,7 +23,7 @@ namespace linalg { /// -/// Ideally all these functions would go in an Analysis but until +/// Ideally all these functions would go in an Analysis but as long as /// TensorContractionBase is templated, they need to remain close enough. /// diff --git a/mlir/examples/Linalg/Linalg3/include/linalg3/Transforms.h b/mlir/examples/Linalg/Linalg3/include/linalg3/Transforms.h index a66f7c4..f54c76b 100644 --- a/mlir/examples/Linalg/Linalg3/include/linalg3/Transforms.h +++ b/mlir/examples/Linalg/Linalg3/include/linalg3/Transforms.h @@ -19,14 +19,40 @@ #define LINALG3_TRANSFORMS_H_ #include "linalg2/Transforms.h" +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" namespace mlir { +class AffineForOp; +class AffineMap; class Function; class FunctionPassBase; +class Operation; +class Value; } // namespace mlir namespace linalg { +struct RangeParts { + explicit RangeParts(unsigned reserved); + RangeParts(llvm::ArrayRef ranges); + llvm::SmallVector makeRanges(); + + llvm::SmallVector mins; + llvm::SmallVector maxes; + llvm::SmallVector steps; +}; + +mlir::Value * +makeFoldedComposedAffineApply(mlir::AffineMap map, + llvm::ArrayRef operandsRef); + +llvm::SmallVector +makeGenericLoopRanges(mlir::AffineMap operandRangesToLoopMaps, + llvm::ArrayRef ranges, + llvm::ArrayRef tileSizes = {}); + /// Traverses `f` and rewrites linalg.slice, and the operations it depends on, /// to only use linalg.view operations. void composeSliceOps(mlir::Function *f); @@ -35,6 +61,13 @@ void composeSliceOps(mlir::Function *f); /// as linalg.matvec (resp. linalg.dot). void lowerToFinerGrainedTensorContraction(mlir::Function *f); +/// Operation-wise writing of linalg operations to loop form. +/// It is the caller's responsibility to erase the `op` if necessary. +/// This returns the enclosing loops around the body of `op` for further +/// composition of transformations. +llvm::Optional> +writeAsLoops(mlir::Operation *op); + /// Traverses `f` and rewrites linalg operations in loop form. void lowerToLoops(mlir::Function *f); diff --git a/mlir/examples/Linalg/Linalg3/lib/TensorOps.cpp b/mlir/examples/Linalg/Linalg3/lib/TensorOps.cpp index 61eaa06..c6f402f 100644 --- a/mlir/examples/Linalg/Linalg3/lib/TensorOps.cpp +++ b/mlir/examples/Linalg/Linalg3/lib/TensorOps.cpp @@ -39,14 +39,16 @@ using namespace linalg::intrinsics; ////////////////////////////////////////////////////////////////////////////// // Implementation of DotOp. ////////////////////////////////////////////////////////////////////////////// -AffineMap linalg::DotOp::loopsToOperandRangesMap() { +SmallVector linalg::DotOp::loopsToOperandRangeMaps() { // A(K), B(K), C() assert(getRanges(*this).size() == 2); auto *context = ScopedContext::getContext(); auto d0 = getAffineDimExpr(0, context); // K // A(K), B(K), C() // (d0) -> (d0, d0)(%k) - return AffineMap::get(1, 0, {d0, d0}, {}); + return SmallVector{AffineMap::get(1, 0, {d0}, {}), // A(K) + AffineMap::get(1, 0, {d0}, {}), // B(K) + AffineMap()}; // C() } void linalg::DotOp::emitScalarImplementation( @@ -75,7 +77,7 @@ void linalg::DotOp::emitScalarImplementation( ////////////////////////////////////////////////////////////////////////////// // Implementation of MatvecOp. ////////////////////////////////////////////////////////////////////////////// -AffineMap linalg::MatvecOp::loopsToOperandRangesMap() { +SmallVector linalg::MatvecOp::loopsToOperandRangeMaps() { // A(M, K), B(K), C(M) assert(getRanges(*this).size() == 4); auto *context = ScopedContext::getContext(); @@ -83,7 +85,10 @@ AffineMap linalg::MatvecOp::loopsToOperandRangesMap() { auto d1 = getAffineDimExpr(1, context); // K // A(M, K), B(K), C(M) // (d0, d1) -> (d0, d1, d1, d0)(%m, %k) - return AffineMap::get(2, 0, {d0, d1, d1, d0}, {}); + return SmallVector{ + AffineMap::get(2, 0, {d0, d1}, {}), // A(M, K) + AffineMap::get(2, 0, {d1}, {}), // B(K) + AffineMap::get(2, 0, {d0}, {})}; // C(M) } // The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j) @@ -135,9 +140,9 @@ void linalg::MatvecOp::emitScalarImplementation( } ////////////////////////////////////////////////////////////////////////////// -// Op-specific Matmul. +// Implementation of Matmul. ////////////////////////////////////////////////////////////////////////////// -AffineMap linalg::MatmulOp::loopsToOperandRangesMap() { +SmallVector linalg::MatmulOp::loopsToOperandRangeMaps() { // A(M, K), B(K, N), C(M, N) assert(getRanges(*this).size() == 6); auto *context = ScopedContext::getContext(); @@ -146,7 +151,11 @@ AffineMap linalg::MatmulOp::loopsToOperandRangesMap() { auto d2 = getAffineDimExpr(2, context); // K // A(M, K), B(K, N), C(M, N): // (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k) - return AffineMap::get(3, 0, {d0, d2, d2, d1, d0, d1}, {}); + return SmallVector{ + AffineMap::get(3, 0, {d0, d2}, {}), // A(M, K) + AffineMap::get(3, 0, {d2, d1}, {}), // B(K, N) + AffineMap::get(3, 0, {d0, d1}, {}) // C(M, N) + }; } // The body expression for matmul is: C(i, j) = scalarC + A(i, r_k) * B(r_k, j) diff --git a/mlir/examples/Linalg/Linalg3/lib/Transforms.cpp b/mlir/examples/Linalg/Linalg3/lib/Transforms.cpp index c224d8e..d9a56c6 100644 --- a/mlir/examples/Linalg/Linalg3/lib/Transforms.cpp +++ b/mlir/examples/Linalg/Linalg3/lib/Transforms.cpp @@ -71,8 +71,8 @@ static Value *tryFold(AffineMap map, SmallVector operands) { return nullptr; } -static Value *makeFoldedComposedAffineApply(AffineMap map, - ArrayRef operandsRef) { +Value *linalg::makeFoldedComposedAffineApply(AffineMap map, + ArrayRef operandsRef) { SmallVector operands(operandsRef.begin(), operandsRef.end()); fullyComposeAffineMapAndOperands(&map, &operands); if (auto *v = tryFold(map, operands)) { @@ -83,18 +83,7 @@ static Value *makeFoldedComposedAffineApply(AffineMap map, return b->create(loc, map, operands).getResult(); } -struct RangeParts { - explicit RangeParts(unsigned reserved); - RangeParts(ArrayRef ranges); - - SmallVector makeRanges(); - - SmallVector mins; - SmallVector maxes; - SmallVector steps; -}; - -RangeParts::RangeParts(unsigned reserved) { +linalg::RangeParts::RangeParts(unsigned reserved) { mins.reserve(reserved); maxes.reserve(reserved); steps.reserve(reserved); @@ -112,12 +101,12 @@ extractFromRanges(ArrayRef ranges, return res; } -RangeParts::RangeParts(ArrayRef ranges) +linalg::RangeParts::RangeParts(ArrayRef ranges) : mins(extractFromRanges(ranges, [](RangeOp r) { return r.getMin(); })), maxes(extractFromRanges(ranges, [](RangeOp r) { return r.getMax(); })), steps(extractFromRanges(ranges, [](RangeOp r) { return r.getStep(); })) {} -SmallVector RangeParts::makeRanges() { +SmallVector linalg::RangeParts::makeRanges() { SmallVector res; res.reserve(mins.size()); for (auto z : llvm::zip(mins, maxes, steps)) { @@ -149,14 +138,15 @@ SmallVector makeGenericRanges(AffineMap map, return makeGenericRangeParts(map, ranges).makeRanges(); } -static SmallVector makeGenericLoopRanges( - AffineMap operandRangesToLoopsMap, ArrayRef ranges, - llvm::Optional> tileSizes = llvm::None) { - RangeParts res = makeGenericRangeParts(operandRangesToLoopsMap, ranges); - if (!tileSizes.hasValue()) +SmallVector +linalg::makeGenericLoopRanges(AffineMap operandRangesToLoopMaps, + ArrayRef ranges, + ArrayRef tileSizes) { + RangeParts res = makeGenericRangeParts(operandRangesToLoopMaps, ranges); + if (tileSizes.empty()) return res.makeRanges(); SmallVector tiledSteps; - for (auto z : llvm::zip(res.steps, *tileSizes)) { + for (auto z : llvm::zip(res.steps, tileSizes)) { auto *step = std::get<0>(z); auto tileSize = std::get<1>(z); auto stepValue = step->getDefiningOp()->cast().getValue(); @@ -171,11 +161,12 @@ static SmallVector makeGenericLoopRanges( template static SmallVector -writeAsLoops(ContractionOp contraction) { - ScopedContext scope(mlir::FuncBuilder(contraction.getOperation()), +writeContractionAsLoops(ContractionOp contraction) { + ScopedContext scope(FuncBuilder(contraction.getOperation()), contraction.getLoc()); - auto loopRanges = makeGenericLoopRanges(operandRangesToLoopsMap(contraction), - getRanges(contraction)); + auto allRanges = getRanges(contraction); + auto loopRanges = + makeGenericLoopRanges(operandRangesToLoopsMap(contraction), allRanges); SmallVector parallelIvs(contraction.getNumParallelDims()); SmallVector reductionIvs(contraction.getNumReductionDims()); @@ -201,27 +192,33 @@ writeAsLoops(ContractionOp contraction) { }); // clang-format on - SmallVector res; - res.reserve(pivs.size() + rivs.size()); + // Return the AffineForOp for better compositionality (e.g. tiling). + SmallVector loops; + loops.reserve(pivs.size() + rivs.size()); for (auto iv : parallelIvs) - res.push_back(getForInductionVarOwner(iv.getValue())); + loops.push_back(getForInductionVarOwner(iv.getValue())); for (auto iv : reductionIvs) - res.push_back(getForInductionVarOwner(iv.getValue())); - return res; + loops.push_back(getForInductionVarOwner(iv.getValue())); + + return loops; +} + +llvm::Optional> +linalg::writeAsLoops(Operation *op) { + if (auto matmulOp = op->dyn_cast()) { + return writeContractionAsLoops(matmulOp); + } else if (auto matvecOp = op->dyn_cast()) { + return writeContractionAsLoops(matvecOp); + } else if (auto dotOp = op->dyn_cast()) { + return writeContractionAsLoops(dotOp); + } + return llvm::None; } void linalg::lowerToLoops(mlir::Function *f) { f->walk([](Operation *op) { - if (auto matmulOp = op->dyn_cast()) { - writeAsLoops(matmulOp); - } else if (auto matvecOp = op->dyn_cast()) { - writeAsLoops(matvecOp); - } else if (auto dotOp = op->dyn_cast()) { - writeAsLoops(dotOp); - } else { - return; - } - op->erase(); + if (writeAsLoops(op)) + op->erase(); }); } diff --git a/mlir/examples/Linalg/Linalg4/Example.cpp b/mlir/examples/Linalg/Linalg4/Example.cpp new file mode 100644 index 0000000..91cc0f9 --- /dev/null +++ b/mlir/examples/Linalg/Linalg4/Example.cpp @@ -0,0 +1,176 @@ +//===- Example.cpp - Our running example ----------------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +// RUN: %p/test | FileCheck %s + +#include "TestHarness.h" +#include "linalg1/Common.h" +#include "linalg2/Intrinsics.h" +#include "linalg3/Ops.h" +#include "linalg4/Transforms.h" +#include "mlir/IR/OpImplementation.h" + +using llvm::StringRef; + +using namespace mlir; +using namespace mlir::edsc; +using namespace mlir::edsc::intrinsics; +using namespace linalg; +using namespace linalg::common; +using namespace linalg::intrinsics; + +Function *makeFunctionWithAMatmulOp(Module &module, StringRef name) { + MLIRContext *context = module.getContext(); + auto dynamic2DMemRefType = floatMemRefType<2>(context); + mlir::Function *f = linalg::common::makeFunction( + module, name, + {dynamic2DMemRefType, dynamic2DMemRefType, dynamic2DMemRefType}, {}); + + ScopedContext scope(f); + // clang-format off + ValueHandle + M = dim(f->getArgument(0), 0), + N = dim(f->getArgument(2), 1), + K = dim(f->getArgument(0), 1), + rM = range(constant_index(0), M, constant_index(1)), + rN = range(constant_index(0), N, constant_index(1)), + rK = range(constant_index(0), K, constant_index(1)), + vA = view(f->getArgument(0), {rM, rK}), + vB = view(f->getArgument(1), {rK, rN}), + vC = view(f->getArgument(2), {rM, rN}); + matmul(vA, vB, vC); + ret(); + // clang-format on + + return f; +} + +TEST_FUNC(matmul_tiled_loops) { + MLIRContext context; + Module module(&context); + mlir::Function *f = makeFunctionWithAMatmulOp(module, "matmul_tiled_loops"); + lowerToTiledLoops(f, {8, 9}); + PassManager pm; + pm.addPass(createLowerLinalgLoadStorePass()); + if (succeeded(pm.run(f->getModule()))) + cleanupAndPrintFunction(f); + + // clang-format off + // CHECK-LABEL: func @matmul_tiled_loops(%arg0: memref, %arg1: memref, %arg2: memref) { + // CHECK: %[[M:.*]] = dim %arg0, 0 : memref + // CHECK: %[[N:.*]] = dim %arg2, 1 : memref + // CHECK: %[[K:.*]] = dim %arg0, 1 : memref + // CHECK: affine.for %i0 = 0 to (d0) -> (d0)(%[[M]]) step 8 { + // CHECK: affine.for %i1 = 0 to (d0) -> (d0)(%[[N]]) step 9 { + // CHECK: affine.for %i2 = 0 to (d0) -> (d0)(%[[K]]) { + // CHECK: affine.for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%{{.*}}] to min (d0)[s0] -> (s0, d0 + 8)(%i0)[%[[M]]] { + // CHECK: affine.for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%{{.*}}] to min (d0)[s0] -> (s0, d0 + 9)(%i1)[%[[N]]] { + // CHECK-NEXT: %{{.*}} = cmpi "eq", %i2, %{{.*}} : index + // CHECK-NEXT: %[[I3:.*]] = affine.apply (d0) -> (d0)(%i3) + // CHECK-NEXT: %[[I4:.*]] = affine.apply (d0) -> (d0)(%i4) + // CHECK-NEXT: %{{.*}} = load %arg2[%[[I3]], %[[I4]]] : memref + // CHECK-NEXT: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 + // CHECK-NEXT: %[[I2:.*]] = affine.apply (d0) -> (d0)(%i2) + // CHECK-NEXT: %{{.*}} = load %arg1[%[[I2]], %[[I4]]] : memref + // CHECK-NEXT: %{{.*}} = load %arg0[%[[I3]], %[[I2]]] : memref + // CHECK-NEXT: %{{.*}} = mulf %10, %9 : f32 + // CHECK-NEXT: %{{.*}} = addf %7, %11 : f32 + // CHECK-NEXT: store %{{.*}}, %arg2[%[[I3]], %[[I4]]] : memref + // clang-format on +} + +TEST_FUNC(matmul_tiled_views) { + MLIRContext context; + Module module(&context); + mlir::Function *f = makeFunctionWithAMatmulOp(module, "matmul_tiled_views"); + FuncBuilder b(f); + lowerToTiledViews(f, {b.create(f->getLoc(), 8), + b.create(f->getLoc(), 9)}); + composeSliceOps(f); + + // clang-format off + // CHECK-LABEL: func @matmul_tiled_views(%arg0: memref, %arg1: memref, %arg2: memref) { + // CHECK: %[[M:.*]] = dim %arg0, 0 : memref + // CHECK: %[[N:.*]] = dim %arg2, 1 : memref + // CHECK: %[[K:.*]] = dim %arg0, 1 : memref + // CHECK: affine.for %i0 = 0 to (d0) -> (d0)(%[[M]]) step 8 { + // CHECK-NEXT: affine.for %i1 = 0 to (d0) -> (d0)(%[[N]]) step 9 { + // CHECK-NEXT: %[[i0min:.*]] = affine.apply (d0) -> (d0)(%i0) + // CHECK-NEXT: %[[i0max:.*]] = affine.apply (d0) -> (d0 + 8)(%i0) + // CHECK-NEXT: %[[ri0:.*]] = linalg.range %[[i0min]]:%[[i0max]]:{{.*}} : !linalg<"range"> + // CHECK: %[[rK:.*]] = linalg.range %{{.*}}:%{{.*}}:%{{.*}} : !linalg<"range"> + // CHECK: %[[vA:.*]] = linalg.view %arg0[%[[ri0]], %[[rK]]] : !linalg<"view"> + // CHECK: %[[i1min:.*]] = affine.apply (d0) -> (d0)(%i1) + // CHECK-NEXT: %[[i1max:.*]] = affine.apply (d0) -> (d0 + 9)(%i1) + // CHECK-NEXT: %[[ri1:.*]] = linalg.range %[[i1min]]:%[[i1max]]:%{{.*}} : !linalg<"range"> + // CHECK-NEXT: %[[vB:.*]] = linalg.view %arg1[%10, %13] : !linalg<"view"> + // CHECK-NEXT: %[[vC:.*]] = linalg.view %arg2[%5, %13] : !linalg<"view"> + // CHECK-NEXT: linalg.matmul {%[[vA]], %[[vB]]} -> {%[[vC]]} + // clang-format on + cleanupAndPrintFunction(f); +} + +TEST_FUNC(matmul_tiled_views_as_loops) { + MLIRContext context; + Module module(&context); + mlir::Function *f = + makeFunctionWithAMatmulOp(module, "matmul_tiled_views_as_loops"); + FuncBuilder b(f); + lowerToTiledViews(f, {b.create(f->getLoc(), 8), + b.create(f->getLoc(), 9)}); + composeSliceOps(f); + lowerToLoops(f); + // This cannot lower below linalg.load and linalg.store due to lost + // information related to loop bounds and tiling. There are multiple ways to + // attack the problem, the best one is an IR change. + + // clang-format off + // CHECK-LABEL: func @matmul_tiled_views_as_loops(%arg0: memref, %arg1: memref, %arg2: memref) { + // CHECK: %[[M:.*]] = dim %arg0, 0 : memref + // CHECK: %[[N:.*]] = dim %arg2, 1 : memref + // CHECK: %[[K:.*]] = dim %arg0, 1 : memref + // CHECK: affine.for %i0 = 0 to (d0) -> (d0)(%[[M]]) step 8 { + // CHECK-NEXT: affine.for %i1 = 0 to (d0) -> (d0)(%[[N]]) step 9 { + // CHECK-NEXT: %[[i0min:.*]] = affine.apply (d0) -> (d0)(%i0) + // CHECK-NEXT: %[[i0max:.*]] = affine.apply (d0) -> (d0 + 8)(%i0) + // CHECK-NEXT: %[[ri0:.*]] = linalg.range %[[i0min]]:%[[i0max]]:{{.*}} : !linalg<"range"> + // CHECK: %[[rK:.*]] = linalg.range %{{.*}}:%{{.*}}:%{{.*}} : !linalg<"range"> + // CHECK: %[[vA:.*]] = linalg.view %arg0[%[[ri0]], %[[rK]]] : !linalg<"view"> + // CHECK: %[[i1min:.*]] = affine.apply (d0) -> (d0)(%i1) + // CHECK-NEXT: %[[i1max:.*]] = affine.apply (d0) -> (d0 + 9)(%i1) + // CHECK-NEXT: %[[ri1:.*]] = linalg.range %[[i1min]]:%[[i1max]]:%{{.*}} : !linalg<"range"> + // CHECK-NEXT: %[[vB:.*]] = linalg.view %arg1[%10, %13] : !linalg<"view"> + // CHECK-NEXT: %[[vC:.*]] = linalg.view %arg2[%5, %13] : !linalg<"view"> + // CHECK-NEXT: affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0)(%[[i0max]]) { + // CHECK-NEXT: affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0)(%[[i1max]]) { + // CHECK-NEXT: affine.for %i4 = 0 to (d0) -> (d0)(%[[K]]) { + // CHECK-NEXT: %{{.*}} = cmpi "eq", %i4, %c0 : index + // CHECK-NEXT: %{{.*}} = linalg.load %[[vC]][%i2, %i3] : !linalg<"view"> + // CHECK-NEXT: %{{.*}} = select %{{.*}}, %cst, %{{.*}} : f32 + // CHECK-NEXT: %{{.*}} = linalg.load %[[vB]][%i4, %i3] : !linalg<"view"> + // CHECK-NEXT: %{{.*}} = linalg.load %[[vA]][%i2, %i4] : !linalg<"view"> + // CHECK-NEXT: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 + // CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 + // CHECK-NEXT: linalg.store %{{.*}}, %[[vC]][%i2, %i3] : !linalg<"view"> + // clang-format on + cleanupAndPrintFunction(f); +} + +int main() { + RUN_TESTS(); + return 0; +} diff --git a/mlir/examples/Linalg/Linalg4/include/linalg4/Transforms.h b/mlir/examples/Linalg/Linalg4/include/linalg4/Transforms.h new file mode 100644 index 0000000..2165cab --- /dev/null +++ b/mlir/examples/Linalg/Linalg4/include/linalg4/Transforms.h @@ -0,0 +1,47 @@ +//===- Transforms.h - Linalg dialect Transformations definition -----------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#ifndef LINALG4_TRANSFORMS_H_ +#define LINALG4_TRANSFORMS_H_ + +#include "linalg3/Transforms.h" +#include "mlir/Support/LLVM.h" + +namespace linalg { + +/// Rewrites a linalg `op` in tiled loop form and erases `op`. +llvm::Optional> +writeAsTiledLoops(mlir::Operation *op, llvm::ArrayRef tileSizes); + +/// Rewrites a linalg `op` in tiled view form and erases `op`. +llvm::Optional> +writeAsTiledViews(mlir::Operation *op, llvm::ArrayRef tileSizes); + +/// Apply `writeAsTiledLoops` on all linalg ops. This is a convenience function +/// and is not exposed as a pass because a fixed set of tile sizes for all ops +/// in a function can generally not be specified. +void lowerToTiledLoops(mlir::Function *f, llvm::ArrayRef tileSizes); + +/// Apply `writeAsTiledViews` on all linalg ops. This is a convenience function +/// and is not exposed as a pass because a fixed set of tile sizes for all ops +/// in a function can generally not be specified. +void lowerToTiledViews(mlir::Function *f, + llvm::ArrayRef tileSizes); + +} // namespace linalg + +#endif // LINALG4_TRANSFORMS_H_ diff --git a/mlir/examples/Linalg/Linalg4/lib/Transforms.cpp b/mlir/examples/Linalg/Linalg4/lib/Transforms.cpp new file mode 100644 index 0000000..ece8598 --- /dev/null +++ b/mlir/examples/Linalg/Linalg4/lib/Transforms.cpp @@ -0,0 +1,214 @@ +//===- Transforms.cpp - Implementation of the linalg Transformations ------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements analyses and transformations for the linalg dialect. +// +//===----------------------------------------------------------------------===// + +#include "linalg4/Transforms.h" +#include "linalg3/Intrinsics.h" +#include "linalg3/TensorOps.h" + +#include "mlir/AffineOps/AffineOps.h" +#include "mlir/EDSC/Helpers.h" +#include "mlir/IR/OpImplementation.h" +#include "mlir/Transforms/LoopUtils.h" + +using llvm::ArrayRef; +using llvm::SmallVector; +using namespace mlir; +using namespace mlir::edsc; +using namespace linalg; +using namespace linalg::intrinsics; + +llvm::Optional> +linalg::writeAsTiledLoops(Operation *op, ArrayRef tileSizes) { + auto loops = writeAsLoops(op); + if (loops.hasValue()) + return mlir::tile(*loops, tileSizes, loops->back()); + return llvm::None; +} + +void linalg::lowerToTiledLoops(mlir::Function *f, + ArrayRef tileSizes) { + f->walk([tileSizes](Operation *op) { + if (writeAsTiledLoops(op, tileSizes).hasValue()) + op->erase(); + }); +} + +template +static Operation::operand_range +getInputsAndOutputs(TensorContractionBase &contraction) { + auto *inst = static_cast(&contraction)->getOperation(); + auto begin = inst->operand_begin(); + auto end = inst->operand_begin() + contraction.getNumInputs() + + contraction.getNumOutputs(); + return {begin, end}; +} + +static bool isZeroIndex(Value *v) { + return v->getDefiningOp() && v->getDefiningOp()->isa() && + v->getDefiningOp()->dyn_cast().getValue() == 0; +} + +template +static llvm::SmallVector +makeTiledRanges(TensorContractionBase &contraction, + ArrayRef allRanges, llvm::ArrayRef ivs, + llvm::ArrayRef tileSizes) { + assert(ivs.size() == tileSizes.size()); + if (ivs.empty()) + return RangeParts(allRanges).makeRanges(); + + auto *op = static_cast(&contraction); + RangeParts result(allRanges.size()); + RangeParts rangeParts(allRanges); + + for (auto map : op->loopsToOperandRangeMaps()) { + // 1. Take the first ivs results of the map, the other ones are not composed + // but merely copied over. + assert(map.getNumSymbols() == 0); + assert(map.getRangeSizes().empty()); + MLIRContext *context = ScopedContext::getContext(); + unsigned numParallel = op->getNumParallelDims(); + unsigned numReduction = op->getNumReductionDims(); + if (ivs.size() < numParallel + numReduction) { + // Inject zeros in positions that are not tiled. + SmallVector dimReplacements(numParallel + numReduction); + for (unsigned i = 0, e = numParallel + numReduction; i < e; ++i) { + dimReplacements[i] = (i < ivs.size()) + ? getAffineDimExpr(i, context) + : getAffineConstantExpr(0, context); + } + map = map.replaceDimsAndSymbols(dimReplacements, {}, ivs.size(), 0); + } + + // 2. Apply the rewritten map to the ranges. + unsigned numDims = map.getNumDims(); + for (auto en : llvm::enumerate(map.getResults())) { + auto index = en.index(); + auto expr = en.value(); + AffineMap exprMap = AffineMap::get(numDims, 0, expr, {}); + ValueHandle offset(makeFoldedComposedAffineApply(exprMap, ivs)); + // Offset is normally a function of loop induction variables. + // If it is 0, it must come from a dimension that was not tiled. + if (isZeroIndex(offset)) { + result.mins.push_back(rangeParts.mins[index]); + result.maxes.push_back(rangeParts.maxes[index]); + continue; + } + + ValueHandle step(makeFoldedComposedAffineApply(exprMap, tileSizes)); + ValueHandle min(rangeParts.mins[index]); + using edsc::op::operator+; + result.mins.push_back(min + offset); + // Ideally this should be: + // `min(rangeParts.max, rangeParts.min + offset + step)` + // but that breaks the current limitations of the affine dialect. + result.maxes.push_back(min + offset + step); + } + } + // Note that for the purpose of tiled ranges and views, the steps do not + // change in our representation. + result.steps = rangeParts.steps; + + return result.makeRanges(); +} + +template +static SmallVector +makeTiledViews(linalg::TensorContractionBase &contraction, + ArrayRef ivs, ArrayRef tileSizes) { + auto tiledRanges = + makeTiledRanges(contraction, getRanges(contraction), ivs, tileSizes); + SmallVector res; + unsigned currentRange = 0; + for (auto *in : getInputsAndOutputs(contraction)) { + unsigned runningSliceDim = 0; + auto *runningSlice = in; + assert(runningSlice->getType().template isa()); + for (unsigned d = 0, e = getViewRank(runningSlice); d < e; ++d) { + auto *r = tiledRanges[currentRange++]; + runningSlice = slice(runningSlice, r, runningSliceDim++).getValue(); + } + res.push_back(runningSlice); + } + return res; +} + +template +static SmallVector +writeContractionAsTiledViews(TensorContractionBase &contraction, + ArrayRef tileSizes) { + assert(tileSizes.size() <= + contraction.getNumParallelDims() + contraction.getNumReductionDims()); + + auto *op = static_cast(&contraction); + ScopedContext scope(mlir::FuncBuilder(op->getOperation()), op->getLoc()); + SmallVector ivs(tileSizes.size()); + auto pivs = IndexHandle::makeIndexHandlePointers(ivs); + + // clang-format off + using linalg::common::LoopNestRangeBuilder; + auto ranges = makeGenericLoopRanges(operandRangesToLoopsMap(contraction), + getRanges(contraction), tileSizes); + linalg::common::LoopNestRangeBuilder(pivs, ranges)({ + [&contraction, &tileSizes, &ivs]() { + SmallVector ivValues(ivs.begin(), ivs.end()); + auto views = makeTiledViews(contraction, ivValues, tileSizes); + ScopedContext::getBuilder()->create( + ScopedContext::getLocation(), views); + /// NestedBuilders expect handles, we thus return an IndexHandle. + return IndexHandle(); + }() + }); + // clang-format on + + SmallVector res; + res.reserve(ivs.size()); + for (auto iv : ivs) + res.push_back(getForInductionVarOwner(iv.getValue())); + return res; +} + +llvm::Optional> +linalg::writeAsTiledViews(Operation *op, ArrayRef tileSizes) { + if (auto matmulOp = op->dyn_cast()) { + return writeContractionAsTiledViews(matmulOp, tileSizes); + } else if (auto matvecOp = op->dyn_cast()) { + return writeContractionAsTiledViews(matvecOp, tileSizes); + } else if (auto dotOp = op->dyn_cast()) { + return writeContractionAsTiledViews(dotOp, tileSizes); + } + return llvm::None; +} + +void linalg::lowerToTiledViews(mlir::Function *f, ArrayRef tileSizes) { + f->walk([tileSizes](Operation *op) { + if (auto matmulOp = op->dyn_cast()) { + writeAsTiledViews(matmulOp, tileSizes); + } else if (auto matvecOp = op->dyn_cast()) { + writeAsTiledViews(matvecOp, tileSizes); + } else if (auto dotOp = op->dyn_cast()) { + writeAsTiledViews(dotOp, tileSizes); + } else { + return; + } + op->erase(); + }); +} -- 2.7.4