From 9b58691e749091543fbaa70b2fcd8f62e025e214 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 20 May 2019 08:04:40 -0700 Subject: [PATCH] Add lowering of LinalgLibraryOps to linalg.load + linalg.store. This CL adds a pass to lower out of dot,matvec,matmul etc and into a combination of affine.for, linalg.load and linalg.store operations. Such operations can then later lowered to LLVM. This CL essentially performs op expansion using EDSCs and factors out a few common utils from Tiling.cpp. -- PiperOrigin-RevId: 249049518 --- mlir/include/mlir/Linalg/IR/LinalgOps.h | 4 + mlir/include/mlir/Linalg/IR/LinalgOps.td | 4 +- mlir/include/mlir/Linalg/Passes.h | 2 + mlir/include/mlir/Linalg/Utils/Utils.h | 19 +++++ mlir/lib/Linalg/IR/LinalgOps.cpp | 51 +++++++++++- mlir/lib/Linalg/Transforms/LowerToLoops.cpp | 118 ++++++++++++++++++++++++++++ mlir/lib/Linalg/Transforms/Tiling.cpp | 86 +++----------------- mlir/lib/Linalg/Utils/Utils.cpp | 60 ++++++++++++++ mlir/test/Linalg/loops.mlir | 75 ++++++++++++++++++ 9 files changed, 339 insertions(+), 80 deletions(-) create mode 100644 mlir/lib/Linalg/Transforms/LowerToLoops.cpp create mode 100644 mlir/test/Linalg/loops.mlir diff --git a/mlir/include/mlir/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Linalg/IR/LinalgOps.h index 0c48cb0..7a106cf 100644 --- a/mlir/include/mlir/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Linalg/IR/LinalgOps.h @@ -406,6 +406,10 @@ private: }; }; +void emitScalarImplementation(llvm::ArrayRef parallelIvs, + llvm::ArrayRef reductionIvs, + LinalgOp &linalgOp); + } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Linalg/IR/LinalgOps.td index c595b4c..824db9a 100644 --- a/mlir/include/mlir/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Linalg/IR/LinalgOps.td @@ -47,7 +47,7 @@ def BufferSizeOp : let description = [{ The "linalg.buffer_size" operation takes a linalg.buffer and returns an "index". For example: - + %0 = linalg.buffer_size %arg0 : !linalg.buffer }]; // Fully specified by traits. @@ -112,4 +112,4 @@ def RangeIntersectOp : Linalg_Op<"range_intersect", [NoSideEffect]>, }]>]; } -#endif // LINALG_OPS \ No newline at end of file +#endif // LINALG_OPS diff --git a/mlir/include/mlir/Linalg/Passes.h b/mlir/include/mlir/Linalg/Passes.h index 0e3a86b..88f2346 100644 --- a/mlir/include/mlir/Linalg/Passes.h +++ b/mlir/include/mlir/Linalg/Passes.h @@ -32,6 +32,8 @@ class ModulePassBase; namespace linalg { FunctionPassBase *createLinalgTilingPass(ArrayRef tileSizes = {}); +FunctionPassBase *createLowerLinalgToLoopsPass(); + ModulePassBase *createLowerLinalgToLLVMPass(); } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Linalg/Utils/Utils.h b/mlir/include/mlir/Linalg/Utils/Utils.h index 63bb2b3..e0299c9 100644 --- a/mlir/include/mlir/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Linalg/Utils/Utils.h @@ -42,6 +42,18 @@ private: } // namespace edsc +/// Helper class to memoize the creation of redundant constants within a given +/// function. +class FunctionConstants { +public: + FunctionConstants(Function &f) : f(f) {} + Value *getOrCreateIndex(int64_t v); + +private: + Function &f; + llvm::SmallDenseMap map; +}; + /// Abstracts away the extraction of values of RangeType from the actual op /// implementation. For each operand of `op`: /// 1. If it is of RangeType, appends it to the result. @@ -77,6 +89,13 @@ Value *createOrReturnView(FuncBuilder *b, Location loc, enum class RangePart { Min = 0, Max, Step }; Value *extractRangePart(Value *range, RangePart part); +/// Returns the values obtained by applying `map` to the list of range parts +/// extracted from `ranges`. +SmallVector applyMapToRangePart(FuncBuilder *b, Location loc, + AffineMap map, + ArrayRef ranges, + RangePart part, + FunctionConstants &state); } // namespace mlir #endif // MLIR_LINALG_UTILS_H_ diff --git a/mlir/lib/Linalg/IR/LinalgOps.cpp b/mlir/lib/Linalg/IR/LinalgOps.cpp index 6a8f289..27e1385 100644 --- a/mlir/lib/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Linalg/IR/LinalgOps.cpp @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Linalg/IR/LinalgOps.h" +#include "mlir/EDSC/Helpers.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Builders.h" @@ -30,6 +31,8 @@ #include "mlir/Support/STLExtras.h" using namespace mlir; +using namespace mlir::edsc; +using namespace mlir::edsc::intrinsics; using namespace mlir::linalg; ////////////////////////////////////////////////////////////////////////////// @@ -594,6 +597,7 @@ static ParseResult parseLinalgLibraryOp(OpAsmParser *parser, } namespace mlir { +namespace linalg { #define GET_OP_CLASSES #include "mlir/Linalg/IR/LinalgOps.cpp.inc" @@ -601,6 +605,7 @@ namespace mlir { #define GET_OP_CLASSES #include "mlir/Linalg/IR/LinalgLibraryOps.cpp.inc" +} // namespace linalg } // namespace mlir // Ideally this should all be Tablegen'd but there is no good story for @@ -621,9 +626,53 @@ SmallVector mlir::linalg::loopToOperandRangesMaps(Operation *op) { AffineMap::get(2, 0, {j}, {}), AffineMap::get(2, 0, {i}, {})}; if (isa(op)) - // A(i, r_j) * B(r_j) -> C(i) + // A(i, r_k) * B(r_k, j) -> C(i, j) return SmallVector{AffineMap::get(3, 0, {i, k}, {}), AffineMap::get(3, 0, {k, j}, {}), AffineMap::get(3, 0, {i, j}, {})}; llvm_unreachable("Missing loopToOperandRangesMaps for op"); } + +// Ideally this should all be Tablegen'd but there is no good story for op +// expansion directly in MLIR for now. +void mlir::linalg::emitScalarImplementation( + llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs, + LinalgOp &linalgOp) { + using linalg_load = ValueBuilder; + using linalg_store = OperationBuilder; + using IndexedValue = TemplatedIndexedValue; + assert(reductionIvs.size() == 1); + auto innermostLoop = getForInductionVarOwner(reductionIvs.back()); + auto *body = innermostLoop.getBody(); + using edsc::op::operator+; + using edsc::op::operator*; + using edsc::op::operator==; + using edsc::intrinsics::select; + + // account for affine.terminator in loop. + FuncBuilder b(body, std::prev(body->end(), 1)); + ScopedContext scope(b, innermostLoop.getLoc()); + auto *op = linalgOp.getOperation(); + if (isa(op)) { + IndexHandle r_i(reductionIvs[0]); + IndexedValue A(op->getOperand(0)), B(op->getOperand(1)), + C(op->getOperand(2)); + C() = C() + A(r_i) * B(r_i); + return; + } + if (isa(op)) { + IndexHandle i(parallelIvs[0]), r_j(reductionIvs[0]); + IndexedValue A(op->getOperand(0)), B(op->getOperand(1)), + C(op->getOperand(2)); + C(i) = C(i) + A(i, r_j) * B(r_j); + return; + } + if (isa(op)) { + IndexHandle i(parallelIvs[0]), j(parallelIvs[1]), r_k(reductionIvs[0]); + IndexedValue A(op->getOperand(0)), B(op->getOperand(1)), + C(op->getOperand(2)); + C(i, j) = C(i, j) + A(i, r_k) * B(r_k, j); + return; + } + llvm_unreachable("Missing loopToOperandRangesMaps for op"); +} diff --git a/mlir/lib/Linalg/Transforms/LowerToLoops.cpp b/mlir/lib/Linalg/Transforms/LowerToLoops.cpp new file mode 100644 index 0000000..b89b7cd --- /dev/null +++ b/mlir/lib/Linalg/Transforms/LowerToLoops.cpp @@ -0,0 +1,118 @@ +//===- LowerToLoops.cpp - conversion from Linalg library ops to loops------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#include "mlir/EDSC/Helpers.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/OpImplementation.h" +#include "mlir/Linalg/IR/LinalgOps.h" +#include "mlir/Linalg/IR/LinalgTypes.h" +#include "mlir/Linalg/Passes.h" +#include "mlir/Linalg/Utils/Utils.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/STLExtras.h" + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::edsc; +using namespace mlir::edsc::intrinsics; +using namespace mlir::linalg; +using namespace llvm; + +// Creates a number of ranges equal to the number of results in `map`. +// The returned ranges correspond to the loop ranges, in the proper order, for +// which new loops will be created. +static SmallVector makeLoopRanges(FuncBuilder *b, Location loc, + AffineMap map, + ArrayRef allOpRanges, + FunctionConstants &state) { + // Apply `map` to get mins/maxes/steps in loop order. + auto mins = + applyMapToRangePart(b, loc, map, allOpRanges, RangePart::Min, state); + auto maxes = + applyMapToRangePart(b, loc, map, allOpRanges, RangePart::Max, state); + auto steps = + applyMapToRangePart(b, loc, map, allOpRanges, RangePart::Step, state); + + // Create a new range with the applied tile sizes. + SmallVector res; + for (unsigned idx = 0, e = steps.size(); idx < e; ++idx) + res.push_back(b->create(loc, mins[idx], maxes[idx], steps[idx])); + return res; +} + +static void emitLinalgOpAsLoops(LinalgOp &linalgOp, FunctionConstants &state) { + FuncBuilder b(linalgOp.getOperation()); + ScopedContext scope(b, linalgOp.getOperation()->getLoc()); + auto loopRanges = makeLoopRanges( + scope.getBuilder(), scope.getLocation(), + // The flattened loopToOperandRangesMaps is expected to be an invertible + // permutation map (which is asserted in the inverse calculation). + inversePermutation(concatAffineMaps(loopToOperandRangesMaps(linalgOp))), + getRanges(linalgOp.getOperation()), state); + + SmallVector parallelIvs(linalgOp.getNumParallelLoops()); + SmallVector reductionIvs(linalgOp.getNumReductionLoops()); + auto pivs = IndexHandle::makeIndexHandlePointers(parallelIvs); + auto rivs = IndexHandle::makeIndexHandlePointers(reductionIvs); + assert(loopRanges.size() == pivs.size() + rivs.size()); + + // clang-format off + ArrayRef ranges(loopRanges); + LoopNestRangeBuilder(pivs, ranges.take_front(pivs.size()))({ + LoopNestRangeBuilder(rivs, ranges.take_back(rivs.size()))({ + [&linalgOp, ¶llelIvs, &reductionIvs]() { + SmallVector parallel( + parallelIvs.begin(), parallelIvs.end()); + SmallVector reduction( + reductionIvs.begin(), reductionIvs.end()); + emitScalarImplementation(parallel, reduction, linalgOp); + /// NestedBuilders expect handles, we thus return an IndexHandle. + return IndexHandle(); + }() + }) + }); + // clang-format on +} + +namespace { +struct LowerLinalgToLoopsPass : public FunctionPass { + void runOnFunction(); +}; +} // namespace + +void LowerLinalgToLoopsPass::runOnFunction() { + auto &f = getFunction(); + FunctionConstants state(f); + f.walk([&state](Operation *op) { + if (auto linalgOp = dyn_cast(op)) { + emitLinalgOpAsLoops(linalgOp, state); + op->erase(); + } + }); +} + +FunctionPassBase *mlir::linalg::createLowerLinalgToLoopsPass() { + return new LowerLinalgToLoopsPass(); +} + +static PassRegistration + pass("linalg-lower-to-loops", + "Lower the operations from the linalg dialect into loops"); diff --git a/mlir/lib/Linalg/Transforms/Tiling.cpp b/mlir/lib/Linalg/Transforms/Tiling.cpp index b2cff60..34d567d 100644 --- a/mlir/lib/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Linalg/Transforms/Tiling.cpp @@ -47,74 +47,6 @@ static llvm::cl::list llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated, llvm::cl::cat(clOptionsCategory)); -namespace { -class PerFunctionState { -public: - PerFunctionState(Function &f) : f(f) {} - - Value *getOrCreate(int64_t v) { - auto it = map.find(v); - if (it != map.end()) - return it->second; - FuncBuilder builder(f); - edsc::ScopedContext s(builder, f.getLoc()); - return map.insert(std::make_pair(v, edsc::intrinsics::constant_index(v))) - .first->getSecond(); - } - -private: - Function &f; - SmallDenseMap map; -}; -} // namespace - -// Folding eagerly is necessary to abide by affine.for static step requirement. -// We must propagate constants on the steps as aggressively as possible. -// Returns nullptr if folding is not trivially feasible. -static Value *tryFold(AffineMap map, ArrayRef operands, - PerFunctionState &state) { - assert(map.getNumResults() == 1 && "single result map expected"); - auto expr = map.getResult(0); - if (auto dim = expr.dyn_cast()) - return operands[dim.getPosition()]; - if (auto sym = expr.dyn_cast()) - return operands[map.getNumDims() + sym.getPosition()]; - if (auto cst = expr.dyn_cast()) - return state.getOrCreate(cst.getValue()); - return nullptr; -} - -static Value *emitOrFoldComposedAffineApply(FuncBuilder *b, Location loc, - AffineMap map, - ArrayRef operandsRef, - PerFunctionState &state) { - SmallVector operands(operandsRef.begin(), operandsRef.end()); - fullyComposeAffineMapAndOperands(&map, &operands); - if (auto *v = tryFold(map, operands, state)) - return v; - return b->create(loc, map, operands); -} - -static SmallVector applyMapToRangePart(FuncBuilder *b, Location loc, - AffineMap map, - ArrayRef ranges, - RangePart part, - PerFunctionState &state) { - SmallVector rangeParts(ranges.size()); - transform(llvm::make_range(ranges.begin(), ranges.end()), rangeParts.begin(), - [&](Value *range) { return extractRangePart(range, part); }); - - SmallVector res; - res.reserve(map.getNumResults()); - unsigned numDims = map.getNumDims(); - for (auto expr : map.getResults()) { - AffineMap map = AffineMap::get(numDims, 0, expr, {}); - res.push_back( - emitOrFoldComposedAffineApply(b, loc, map, rangeParts, state)); - } - return res; -} - static bool isZero(Value *v) { return isa_and_nonnull(v->getDefiningOp()) && cast(v->getDefiningOp()).getValue() == 0; @@ -146,7 +78,7 @@ static AffineMap nonZeroMap(ArrayRef tileSizes) { static SmallVector makeTiledLoopRanges(FuncBuilder *b, Location loc, AffineMap map, ArrayRef allOpRanges, ArrayRef tileSizes, - PerFunctionState &state) { + FunctionConstants &state) { assert(tileSizes.size() == map.getNumResults()); // Tile sizes are in loop order by construction, apply `map` to // get mins/maxes/steps in loop order. @@ -176,7 +108,7 @@ makeTiledLoopRanges(FuncBuilder *b, Location loc, AffineMap map, // clang-format off // Steps must be constant for now to abide by affine.for semantics. auto *newStep = - state.getOrCreate( + state.getOrCreateIndex( cast(step->getDefiningOp()).getValue() * cast(tileSize->getDefiningOp()).getValue()); res.push_back(b->create(loc, mins[idx], maxes[idx], newStep)); @@ -189,7 +121,7 @@ static SmallVector makeTiledViews(FuncBuilder *b, Location loc, Operation *op, ArrayRef ivs, ArrayRef tileSizes, - PerFunctionState &state) { + FunctionConstants &state) { assert(ivs.size() == static_cast(llvm::count_if( llvm::make_range(tileSizes.begin(), tileSizes.end()), [](Value *v) { return !isZero(v); })) && @@ -250,7 +182,7 @@ static SmallVector makeTiledViews(FuncBuilder *b, Location loc, } static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef tileSizes, - PerFunctionState &state) { + FunctionConstants &state) { // Enforce the convention that "tiling by zero" skips tiling a particular // dimension. This convention is significantly simpler to handle instead of // adjusting affine maps to account for missing dimensions. @@ -288,7 +220,7 @@ static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef tileSizes, } static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef tileSizes, - PerFunctionState &state) { + FunctionConstants &state) { if (tileSizes.empty()) return failure(); @@ -306,11 +238,11 @@ static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef tileSizes, SmallVector tileSizeValues; tileSizeValues.reserve(tileSizes.size()); for (auto ts : tileSizes) - tileSizeValues.push_back(state.getOrCreate(ts)); + tileSizeValues.push_back(state.getOrCreateIndex(ts)); // Pad tile sizes with zero values to enforce our convention. if (tileSizeValues.size() < nLoops) { for (unsigned i = tileSizeValues.size(); i < nLoops; ++i) - tileSizeValues.push_back(state.getOrCreate(0)); + tileSizeValues.push_back(state.getOrCreateIndex(0)); } return tileLinalgOp(op, tileSizeValues, state); @@ -318,14 +250,14 @@ static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef tileSizes, // TODO(ntv) expose as a primitive for other passes. static LogicalResult tileLinalgOp(Operation *op, ArrayRef tileSizes, - PerFunctionState &state) { + FunctionConstants &state) { if (auto linalgOp = dyn_cast(op)) return tileLinalgOp(linalgOp, tileSizes, state); return failure(); } static void tileLinalgOps(Function &f, ArrayRef tileSizes) { - PerFunctionState state(f); + FunctionConstants state(f); f.walk([tileSizes, &state](Operation *op) { if (succeeded(tileLinalgOp(op, tileSizes, state))) op->erase(); diff --git a/mlir/lib/Linalg/Utils/Utils.cpp b/mlir/lib/Linalg/Utils/Utils.cpp index 6732fa1..75cddb1 100644 --- a/mlir/lib/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Linalg/Utils/Utils.cpp @@ -138,3 +138,63 @@ Value *mlir::extractRangePart(Value *range, RangePart part) { } llvm_unreachable("need operations to extract range parts"); } + +// Folding eagerly is necessary to abide by affine.for static step requirement. +// We must propagate constants on the steps as aggressively as possible. +// Returns nullptr if folding is not trivially feasible. +static Value *tryFold(AffineMap map, ArrayRef operands, + FunctionConstants &state) { + assert(map.getNumResults() == 1 && "single result map expected"); + auto expr = map.getResult(0); + if (auto dim = expr.dyn_cast()) + return operands[dim.getPosition()]; + if (auto sym = expr.dyn_cast()) + return operands[map.getNumDims() + sym.getPosition()]; + if (auto cst = expr.dyn_cast()) + return state.getOrCreateIndex(cst.getValue()); + return nullptr; +} + +static Value *emitOrFoldComposedAffineApply(FuncBuilder *b, Location loc, + AffineMap map, + ArrayRef operandsRef, + FunctionConstants &state) { + SmallVector operands(operandsRef.begin(), operandsRef.end()); + fullyComposeAffineMapAndOperands(&map, &operands); + if (auto *v = tryFold(map, operands, state)) + return v; + return b->create(loc, map, operands); +} + +SmallVector mlir::applyMapToRangePart(FuncBuilder *b, Location loc, + AffineMap map, + ArrayRef ranges, + RangePart part, + FunctionConstants &state) { + SmallVector rangeParts(ranges.size()); + llvm::transform(ranges, rangeParts.begin(), + [&](Value *range) { return extractRangePart(range, part); }); + + SmallVector res; + res.reserve(map.getNumResults()); + unsigned numDims = map.getNumDims(); + // For each `expr` in `map`, applies the `expr` to the values extracted from + // ranges. If the resulting application can be folded into a Value*, the + // folding occurs eagerly. Otherwise, an affine.apply operation is emitted. + for (auto expr : map.getResults()) { + AffineMap map = AffineMap::get(numDims, 0, expr, {}); + res.push_back( + emitOrFoldComposedAffineApply(b, loc, map, rangeParts, state)); + } + return res; +} + +Value *FunctionConstants::getOrCreateIndex(int64_t v) { + auto it = map.find(v); + if (it != map.end()) + return it->second; + FuncBuilder builder(f); + edsc::ScopedContext s(builder, f.getLoc()); + return map.insert(std::make_pair(v, edsc::intrinsics::constant_index(v))) + .first->getSecond(); +} diff --git a/mlir/test/Linalg/loops.mlir b/mlir/test/Linalg/loops.mlir new file mode 100644 index 0000000..cdedde5 --- /dev/null +++ b/mlir/test/Linalg/loops.mlir @@ -0,0 +1,75 @@ +// RUN: mlir-opt %s -linalg-lower-to-loops | FileCheck %s + +// CHECK: #[[ID:.*]] = (d0) -> (d0) + +func @matmul(%arg0: !linalg.buffer, %arg1: index, %arg2: index, %arg3: index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %I = linalg.range %c0:%arg1:%c1 : !linalg.range + %J = linalg.range %c0:%arg2:%c1 : !linalg.range + %K = linalg.range %c0:%arg3:%c1 : !linalg.range + %A = linalg.view %arg0[%I, %K] : !linalg.view + %B = linalg.view %arg0[%K, %J] : !linalg.view + %C = linalg.view %arg0[%I, %J] : !linalg.view + linalg.matmul(%A, %B, %C) : !linalg.view, !linalg.view, !linalg.view + return +} +// CHECK-LABEL: func @matmul(%arg0: !linalg.buffer, %arg1: index, %arg2: index, %arg3: index) { +// CHECK: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%arg1) { +// CHECK: affine.for %i1 = #[[ID]](%c0) to #[[ID]](%arg2) { +// CHECK: affine.for %i2 = #[[ID]](%c0) to #[[ID]](%arg3) { +// CHECK-DAG: %[[a:.*]] = linalg.load %[[A]][%i0, %i2] : !linalg.view +// CHECK-DAG: %[[b:.*]] = linalg.load %[[B]][%i2, %i1] : !linalg.view +// CHECK: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK: %[[c:.*]] = linalg.load %[[C]][%i0, %i1] : !linalg.view +// CHECK: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: linalg.store %[[res]], %[[C]][%i0, %i1] : !linalg.view + +func @matvec(%arg0: !linalg.buffer, %arg1: index, %arg2: index, %arg3: index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %I = linalg.range %c0:%arg1:%c1 : !linalg.range + %J = linalg.range %c0:%arg2:%c1 : !linalg.range + %2 = linalg.view %arg0[%I, %J] : !linalg.view + %3 = linalg.view %arg0[%J] : !linalg.view + %4 = linalg.view %arg0[%I] : !linalg.view + linalg.matvec(%2, %3, %4) : !linalg.view, !linalg.view, !linalg.view + return +} +// CHECK-LABEL: func @matvec(%arg0: !linalg.buffer, %arg1: index, %arg2: index, %arg3: index) { +// CHECK: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%arg1) { +// CHECK: affine.for %i1 = #[[ID]](%c0) to #[[ID]](%arg2) { +// CHECK-DAG: %[[a:.*]] = linalg.load %[[A]][%i0, %i1] : !linalg.view +// CHECK-DAG: %[[b:.*]] = linalg.load %[[B]][%i1] : !linalg.view +// CHECK: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK: %[[c:.*]] = linalg.load %[[C]][%i0] : !linalg.view +// CHECK: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: linalg.store %[[res]], %[[C]][%i0] : !linalg.view + +func @dot(%arg0: !linalg.buffer, %arg1: index, %arg2: index, %arg3: index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %I = linalg.range %c0:%arg1:%c1 : !linalg.range + %1 = linalg.view %arg0[%I] : !linalg.view + %2 = linalg.view %arg0[%I] : !linalg.view + %3 = linalg.view %arg0[] : !linalg.view + linalg.dot(%1, %2, %3) : !linalg.view, !linalg.view, !linalg.view + return +} +// CHECK-LABEL: func @dot(%arg0: !linalg.buffer, %arg1: index, %arg2: index, %arg3: index) { +// CHECK: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view +// CHECK: %[[C:.*]] = linalg.view %arg0[] : !linalg.view +// CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%arg1) { +// CHECK-DAG: %[[a:.*]] = linalg.load %[[A]][%i0] : !linalg.view +// CHECK-DAG: %[[b:.*]] = linalg.load %[[B]][%i0] : !linalg.view +// CHECK: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK: %[[c:.*]] = linalg.load %[[C]][] : !linalg.view +// CHECK: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: linalg.store %[[res]], %[[C]][] : !linalg.view \ No newline at end of file -- 2.7.4