};
};
+void emitScalarImplementation(llvm::ArrayRef<Value *> parallelIvs,
+ llvm::ArrayRef<Value *> reductionIvs,
+ LinalgOp &linalgOp);
+
} // namespace linalg
} // namespace mlir
let description = [{
The "linalg.buffer_size" operation takes a linalg.buffer and returns an
"index". For example:
-
+
%0 = linalg.buffer_size %arg0 : !linalg.buffer<f32>
}];
// Fully specified by traits.
}]>];
}
-#endif // LINALG_OPS
\ No newline at end of file
+#endif // LINALG_OPS
namespace linalg {
FunctionPassBase *createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {});
+FunctionPassBase *createLowerLinalgToLoopsPass();
+
ModulePassBase *createLowerLinalgToLLVMPass();
} // namespace linalg
} // namespace mlir
} // namespace edsc
+/// Helper class to memoize the creation of redundant constants within a given
+/// function.
+class FunctionConstants {
+public:
+ FunctionConstants(Function &f) : f(f) {}
+ Value *getOrCreateIndex(int64_t v);
+
+private:
+ Function &f;
+ llvm::SmallDenseMap<int64_t, Value *> map;
+};
+
/// Abstracts away the extraction of values of RangeType from the actual op
/// implementation. For each operand of `op`:
/// 1. If it is of RangeType, appends it to the result.
enum class RangePart { Min = 0, Max, Step };
Value *extractRangePart(Value *range, RangePart part);
+/// Returns the values obtained by applying `map` to the list of range parts
+/// extracted from `ranges`.
+SmallVector<Value *, 4> applyMapToRangePart(FuncBuilder *b, Location loc,
+ AffineMap map,
+ ArrayRef<Value *> ranges,
+ RangePart part,
+ FunctionConstants &state);
} // namespace mlir
#endif // MLIR_LINALG_UTILS_H_
//===----------------------------------------------------------------------===//
#include "mlir/Linalg/IR/LinalgOps.h"
+#include "mlir/EDSC/Helpers.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/Support/STLExtras.h"
using namespace mlir;
+using namespace mlir::edsc;
+using namespace mlir::edsc::intrinsics;
using namespace mlir::linalg;
//////////////////////////////////////////////////////////////////////////////
}
namespace mlir {
+namespace linalg {
#define GET_OP_CLASSES
#include "mlir/Linalg/IR/LinalgOps.cpp.inc"
#define GET_OP_CLASSES
#include "mlir/Linalg/IR/LinalgLibraryOps.cpp.inc"
+} // namespace linalg
} // namespace mlir
// Ideally this should all be Tablegen'd but there is no good story for
AffineMap::get(2, 0, {j}, {}),
AffineMap::get(2, 0, {i}, {})};
if (isa<MatmulOp>(op))
- // A(i, r_j) * B(r_j) -> C(i)
+ // A(i, r_k) * B(r_k, j) -> C(i, j)
return SmallVector<AffineMap, 4>{AffineMap::get(3, 0, {i, k}, {}),
AffineMap::get(3, 0, {k, j}, {}),
AffineMap::get(3, 0, {i, j}, {})};
llvm_unreachable("Missing loopToOperandRangesMaps for op");
}
+
+// Ideally this should all be Tablegen'd but there is no good story for op
+// expansion directly in MLIR for now.
+void mlir::linalg::emitScalarImplementation(
+ llvm::ArrayRef<Value *> parallelIvs, llvm::ArrayRef<Value *> reductionIvs,
+ LinalgOp &linalgOp) {
+ using linalg_load = ValueBuilder<linalg::LoadOp>;
+ using linalg_store = OperationBuilder<linalg::StoreOp>;
+ using IndexedValue = TemplatedIndexedValue<linalg_load, linalg_store>;
+ assert(reductionIvs.size() == 1);
+ auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
+ auto *body = innermostLoop.getBody();
+ using edsc::op::operator+;
+ using edsc::op::operator*;
+ using edsc::op::operator==;
+ using edsc::intrinsics::select;
+
+ // account for affine.terminator in loop.
+ FuncBuilder b(body, std::prev(body->end(), 1));
+ ScopedContext scope(b, innermostLoop.getLoc());
+ auto *op = linalgOp.getOperation();
+ if (isa<DotOp>(op)) {
+ IndexHandle r_i(reductionIvs[0]);
+ IndexedValue A(op->getOperand(0)), B(op->getOperand(1)),
+ C(op->getOperand(2));
+ C() = C() + A(r_i) * B(r_i);
+ return;
+ }
+ if (isa<MatvecOp>(op)) {
+ IndexHandle i(parallelIvs[0]), r_j(reductionIvs[0]);
+ IndexedValue A(op->getOperand(0)), B(op->getOperand(1)),
+ C(op->getOperand(2));
+ C(i) = C(i) + A(i, r_j) * B(r_j);
+ return;
+ }
+ if (isa<MatmulOp>(op)) {
+ IndexHandle i(parallelIvs[0]), j(parallelIvs[1]), r_k(reductionIvs[0]);
+ IndexedValue A(op->getOperand(0)), B(op->getOperand(1)),
+ C(op->getOperand(2));
+ C(i, j) = C(i, j) + A(i, r_k) * B(r_k, j);
+ return;
+ }
+ llvm_unreachable("Missing loopToOperandRangesMaps for op");
+}
--- /dev/null
+//===- LowerToLoops.cpp - conversion from Linalg library ops to loops------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "mlir/EDSC/Helpers.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/Linalg/IR/LinalgOps.h"
+#include "mlir/Linalg/IR/LinalgTypes.h"
+#include "mlir/Linalg/Passes.h"
+#include "mlir/Linalg/Utils/Utils.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/STLExtras.h"
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::edsc;
+using namespace mlir::edsc::intrinsics;
+using namespace mlir::linalg;
+using namespace llvm;
+
+// Creates a number of ranges equal to the number of results in `map`.
+// The returned ranges correspond to the loop ranges, in the proper order, for
+// which new loops will be created.
+static SmallVector<Value *, 4> makeLoopRanges(FuncBuilder *b, Location loc,
+ AffineMap map,
+ ArrayRef<Value *> allOpRanges,
+ FunctionConstants &state) {
+ // Apply `map` to get mins/maxes/steps in loop order.
+ auto mins =
+ applyMapToRangePart(b, loc, map, allOpRanges, RangePart::Min, state);
+ auto maxes =
+ applyMapToRangePart(b, loc, map, allOpRanges, RangePart::Max, state);
+ auto steps =
+ applyMapToRangePart(b, loc, map, allOpRanges, RangePart::Step, state);
+
+ // Create a new range with the applied tile sizes.
+ SmallVector<Value *, 4> res;
+ for (unsigned idx = 0, e = steps.size(); idx < e; ++idx)
+ res.push_back(b->create<RangeOp>(loc, mins[idx], maxes[idx], steps[idx]));
+ return res;
+}
+
+static void emitLinalgOpAsLoops(LinalgOp &linalgOp, FunctionConstants &state) {
+ FuncBuilder b(linalgOp.getOperation());
+ ScopedContext scope(b, linalgOp.getOperation()->getLoc());
+ auto loopRanges = makeLoopRanges(
+ scope.getBuilder(), scope.getLocation(),
+ // The flattened loopToOperandRangesMaps is expected to be an invertible
+ // permutation map (which is asserted in the inverse calculation).
+ inversePermutation(concatAffineMaps(loopToOperandRangesMaps(linalgOp))),
+ getRanges(linalgOp.getOperation()), state);
+
+ SmallVector<IndexHandle, 4> parallelIvs(linalgOp.getNumParallelLoops());
+ SmallVector<IndexHandle, 4> reductionIvs(linalgOp.getNumReductionLoops());
+ auto pivs = IndexHandle::makeIndexHandlePointers(parallelIvs);
+ auto rivs = IndexHandle::makeIndexHandlePointers(reductionIvs);
+ assert(loopRanges.size() == pivs.size() + rivs.size());
+
+ // clang-format off
+ ArrayRef<Value *> ranges(loopRanges);
+ LoopNestRangeBuilder(pivs, ranges.take_front(pivs.size()))({
+ LoopNestRangeBuilder(rivs, ranges.take_back(rivs.size()))({
+ [&linalgOp, ¶llelIvs, &reductionIvs]() {
+ SmallVector<mlir::Value *, 4> parallel(
+ parallelIvs.begin(), parallelIvs.end());
+ SmallVector<mlir::Value *, 4> reduction(
+ reductionIvs.begin(), reductionIvs.end());
+ emitScalarImplementation(parallel, reduction, linalgOp);
+ /// NestedBuilders expect handles, we thus return an IndexHandle.
+ return IndexHandle();
+ }()
+ })
+ });
+ // clang-format on
+}
+
+namespace {
+struct LowerLinalgToLoopsPass : public FunctionPass<LowerLinalgToLoopsPass> {
+ void runOnFunction();
+};
+} // namespace
+
+void LowerLinalgToLoopsPass::runOnFunction() {
+ auto &f = getFunction();
+ FunctionConstants state(f);
+ f.walk([&state](Operation *op) {
+ if (auto linalgOp = dyn_cast<LinalgOp>(op)) {
+ emitLinalgOpAsLoops(linalgOp, state);
+ op->erase();
+ }
+ });
+}
+
+FunctionPassBase *mlir::linalg::createLowerLinalgToLoopsPass() {
+ return new LowerLinalgToLoopsPass();
+}
+
+static PassRegistration<LowerLinalgToLoopsPass>
+ pass("linalg-lower-to-loops",
+ "Lower the operations from the linalg dialect into loops");
llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated,
llvm::cl::cat(clOptionsCategory));
-namespace {
-class PerFunctionState {
-public:
- PerFunctionState(Function &f) : f(f) {}
-
- Value *getOrCreate(int64_t v) {
- auto it = map.find(v);
- if (it != map.end())
- return it->second;
- FuncBuilder builder(f);
- edsc::ScopedContext s(builder, f.getLoc());
- return map.insert(std::make_pair(v, edsc::intrinsics::constant_index(v)))
- .first->getSecond();
- }
-
-private:
- Function &f;
- SmallDenseMap<int64_t, Value *> map;
-};
-} // namespace
-
-// Folding eagerly is necessary to abide by affine.for static step requirement.
-// We must propagate constants on the steps as aggressively as possible.
-// Returns nullptr if folding is not trivially feasible.
-static Value *tryFold(AffineMap map, ArrayRef<Value *> operands,
- PerFunctionState &state) {
- assert(map.getNumResults() == 1 && "single result map expected");
- auto expr = map.getResult(0);
- if (auto dim = expr.dyn_cast<AffineDimExpr>())
- return operands[dim.getPosition()];
- if (auto sym = expr.dyn_cast<AffineSymbolExpr>())
- return operands[map.getNumDims() + sym.getPosition()];
- if (auto cst = expr.dyn_cast<AffineConstantExpr>())
- return state.getOrCreate(cst.getValue());
- return nullptr;
-}
-
-static Value *emitOrFoldComposedAffineApply(FuncBuilder *b, Location loc,
- AffineMap map,
- ArrayRef<Value *> operandsRef,
- PerFunctionState &state) {
- SmallVector<Value *, 4> operands(operandsRef.begin(), operandsRef.end());
- fullyComposeAffineMapAndOperands(&map, &operands);
- if (auto *v = tryFold(map, operands, state))
- return v;
- return b->create<AffineApplyOp>(loc, map, operands);
-}
-
-static SmallVector<Value *, 4> applyMapToRangePart(FuncBuilder *b, Location loc,
- AffineMap map,
- ArrayRef<Value *> ranges,
- RangePart part,
- PerFunctionState &state) {
- SmallVector<Value *, 4> rangeParts(ranges.size());
- transform(llvm::make_range(ranges.begin(), ranges.end()), rangeParts.begin(),
- [&](Value *range) { return extractRangePart(range, part); });
-
- SmallVector<Value *, 4> res;
- res.reserve(map.getNumResults());
- unsigned numDims = map.getNumDims();
- for (auto expr : map.getResults()) {
- AffineMap map = AffineMap::get(numDims, 0, expr, {});
- res.push_back(
- emitOrFoldComposedAffineApply(b, loc, map, rangeParts, state));
- }
- return res;
-}
-
static bool isZero(Value *v) {
return isa_and_nonnull<ConstantIndexOp>(v->getDefiningOp()) &&
cast<ConstantIndexOp>(v->getDefiningOp()).getValue() == 0;
static SmallVector<Value *, 4>
makeTiledLoopRanges(FuncBuilder *b, Location loc, AffineMap map,
ArrayRef<Value *> allOpRanges, ArrayRef<Value *> tileSizes,
- PerFunctionState &state) {
+ FunctionConstants &state) {
assert(tileSizes.size() == map.getNumResults());
// Tile sizes are in loop order by construction, apply `map` to
// get mins/maxes/steps in loop order.
// clang-format off
// Steps must be constant for now to abide by affine.for semantics.
auto *newStep =
- state.getOrCreate(
+ state.getOrCreateIndex(
cast<ConstantIndexOp>(step->getDefiningOp()).getValue() *
cast<ConstantIndexOp>(tileSize->getDefiningOp()).getValue());
res.push_back(b->create<RangeOp>(loc, mins[idx], maxes[idx], newStep));
Operation *op,
ArrayRef<Value *> ivs,
ArrayRef<Value *> tileSizes,
- PerFunctionState &state) {
+ FunctionConstants &state) {
assert(ivs.size() == static_cast<size_t>(llvm::count_if(
llvm::make_range(tileSizes.begin(), tileSizes.end()),
[](Value *v) { return !isZero(v); })) &&
}
static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef<Value *> tileSizes,
- PerFunctionState &state) {
+ FunctionConstants &state) {
// Enforce the convention that "tiling by zero" skips tiling a particular
// dimension. This convention is significantly simpler to handle instead of
// adjusting affine maps to account for missing dimensions.
}
static LogicalResult tileLinalgOp(LinalgOp &op, ArrayRef<int64_t> tileSizes,
- PerFunctionState &state) {
+ FunctionConstants &state) {
if (tileSizes.empty())
return failure();
SmallVector<Value *, 8> tileSizeValues;
tileSizeValues.reserve(tileSizes.size());
for (auto ts : tileSizes)
- tileSizeValues.push_back(state.getOrCreate(ts));
+ tileSizeValues.push_back(state.getOrCreateIndex(ts));
// Pad tile sizes with zero values to enforce our convention.
if (tileSizeValues.size() < nLoops) {
for (unsigned i = tileSizeValues.size(); i < nLoops; ++i)
- tileSizeValues.push_back(state.getOrCreate(0));
+ tileSizeValues.push_back(state.getOrCreateIndex(0));
}
return tileLinalgOp(op, tileSizeValues, state);
// TODO(ntv) expose as a primitive for other passes.
static LogicalResult tileLinalgOp(Operation *op, ArrayRef<int64_t> tileSizes,
- PerFunctionState &state) {
+ FunctionConstants &state) {
if (auto linalgOp = dyn_cast<LinalgOp>(op))
return tileLinalgOp(linalgOp, tileSizes, state);
return failure();
}
static void tileLinalgOps(Function &f, ArrayRef<int64_t> tileSizes) {
- PerFunctionState state(f);
+ FunctionConstants state(f);
f.walk([tileSizes, &state](Operation *op) {
if (succeeded(tileLinalgOp(op, tileSizes, state)))
op->erase();
}
llvm_unreachable("need operations to extract range parts");
}
+
+// Folding eagerly is necessary to abide by affine.for static step requirement.
+// We must propagate constants on the steps as aggressively as possible.
+// Returns nullptr if folding is not trivially feasible.
+static Value *tryFold(AffineMap map, ArrayRef<Value *> operands,
+ FunctionConstants &state) {
+ assert(map.getNumResults() == 1 && "single result map expected");
+ auto expr = map.getResult(0);
+ if (auto dim = expr.dyn_cast<AffineDimExpr>())
+ return operands[dim.getPosition()];
+ if (auto sym = expr.dyn_cast<AffineSymbolExpr>())
+ return operands[map.getNumDims() + sym.getPosition()];
+ if (auto cst = expr.dyn_cast<AffineConstantExpr>())
+ return state.getOrCreateIndex(cst.getValue());
+ return nullptr;
+}
+
+static Value *emitOrFoldComposedAffineApply(FuncBuilder *b, Location loc,
+ AffineMap map,
+ ArrayRef<Value *> operandsRef,
+ FunctionConstants &state) {
+ SmallVector<Value *, 4> operands(operandsRef.begin(), operandsRef.end());
+ fullyComposeAffineMapAndOperands(&map, &operands);
+ if (auto *v = tryFold(map, operands, state))
+ return v;
+ return b->create<AffineApplyOp>(loc, map, operands);
+}
+
+SmallVector<Value *, 4> mlir::applyMapToRangePart(FuncBuilder *b, Location loc,
+ AffineMap map,
+ ArrayRef<Value *> ranges,
+ RangePart part,
+ FunctionConstants &state) {
+ SmallVector<Value *, 4> rangeParts(ranges.size());
+ llvm::transform(ranges, rangeParts.begin(),
+ [&](Value *range) { return extractRangePart(range, part); });
+
+ SmallVector<Value *, 4> res;
+ res.reserve(map.getNumResults());
+ unsigned numDims = map.getNumDims();
+ // For each `expr` in `map`, applies the `expr` to the values extracted from
+ // ranges. If the resulting application can be folded into a Value*, the
+ // folding occurs eagerly. Otherwise, an affine.apply operation is emitted.
+ for (auto expr : map.getResults()) {
+ AffineMap map = AffineMap::get(numDims, 0, expr, {});
+ res.push_back(
+ emitOrFoldComposedAffineApply(b, loc, map, rangeParts, state));
+ }
+ return res;
+}
+
+Value *FunctionConstants::getOrCreateIndex(int64_t v) {
+ auto it = map.find(v);
+ if (it != map.end())
+ return it->second;
+ FuncBuilder builder(f);
+ edsc::ScopedContext s(builder, f.getLoc());
+ return map.insert(std::make_pair(v, edsc::intrinsics::constant_index(v)))
+ .first->getSecond();
+}
--- /dev/null
+// RUN: mlir-opt %s -linalg-lower-to-loops | FileCheck %s
+
+// CHECK: #[[ID:.*]] = (d0) -> (d0)
+
+func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %I = linalg.range %c0:%arg1:%c1 : !linalg.range
+ %J = linalg.range %c0:%arg2:%c1 : !linalg.range
+ %K = linalg.range %c0:%arg3:%c1 : !linalg.range
+ %A = linalg.view %arg0[%I, %K] : !linalg.view<?x?xf32>
+ %B = linalg.view %arg0[%K, %J] : !linalg.view<?x?xf32>
+ %C = linalg.view %arg0[%I, %J] : !linalg.view<?x?xf32>
+ linalg.matmul(%A, %B, %C) : !linalg.view<?x?xf32>, !linalg.view<?x?xf32>, !linalg.view<?x?xf32>
+ return
+}
+// CHECK-LABEL: func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
+// CHECK: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
+// CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
+// CHECK: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
+// CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%arg1) {
+// CHECK: affine.for %i1 = #[[ID]](%c0) to #[[ID]](%arg2) {
+// CHECK: affine.for %i2 = #[[ID]](%c0) to #[[ID]](%arg3) {
+// CHECK-DAG: %[[a:.*]] = linalg.load %[[A]][%i0, %i2] : !linalg.view<?x?xf32>
+// CHECK-DAG: %[[b:.*]] = linalg.load %[[B]][%i2, %i1] : !linalg.view<?x?xf32>
+// CHECK: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK: %[[c:.*]] = linalg.load %[[C]][%i0, %i1] : !linalg.view<?x?xf32>
+// CHECK: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: linalg.store %[[res]], %[[C]][%i0, %i1] : !linalg.view<?x?xf32>
+
+func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %I = linalg.range %c0:%arg1:%c1 : !linalg.range
+ %J = linalg.range %c0:%arg2:%c1 : !linalg.range
+ %2 = linalg.view %arg0[%I, %J] : !linalg.view<?x?xf32>
+ %3 = linalg.view %arg0[%J] : !linalg.view<?xf32>
+ %4 = linalg.view %arg0[%I] : !linalg.view<?xf32>
+ linalg.matvec(%2, %3, %4) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32>
+ return
+}
+// CHECK-LABEL: func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
+// CHECK: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
+// CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
+// CHECK: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
+// CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%arg1) {
+// CHECK: affine.for %i1 = #[[ID]](%c0) to #[[ID]](%arg2) {
+// CHECK-DAG: %[[a:.*]] = linalg.load %[[A]][%i0, %i1] : !linalg.view<?x?xf32>
+// CHECK-DAG: %[[b:.*]] = linalg.load %[[B]][%i1] : !linalg.view<?xf32>
+// CHECK: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK: %[[c:.*]] = linalg.load %[[C]][%i0] : !linalg.view<?xf32>
+// CHECK: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: linalg.store %[[res]], %[[C]][%i0] : !linalg.view<?xf32>
+
+func @dot(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %I = linalg.range %c0:%arg1:%c1 : !linalg.range
+ %1 = linalg.view %arg0[%I] : !linalg.view<?xf32>
+ %2 = linalg.view %arg0[%I] : !linalg.view<?xf32>
+ %3 = linalg.view %arg0[] : !linalg.view<f32>
+ linalg.dot(%1, %2, %3) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32>
+ return
+}
+// CHECK-LABEL: func @dot(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
+// CHECK: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
+// CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
+// CHECK: %[[C:.*]] = linalg.view %arg0[] : !linalg.view<f32>
+// CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%arg1) {
+// CHECK-DAG: %[[a:.*]] = linalg.load %[[A]][%i0] : !linalg.view<?xf32>
+// CHECK-DAG: %[[b:.*]] = linalg.load %[[B]][%i0] : !linalg.view<?xf32>
+// CHECK: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
+// CHECK: %[[c:.*]] = linalg.load %[[C]][] : !linalg.view<f32>
+// CHECK: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
+// CHECK: linalg.store %[[res]], %[[C]][] : !linalg.view<f32>
\ No newline at end of file