From 0eac031fac9a3c5e3e9813de7e254f9922fbad46 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Wed, 29 May 2019 09:12:47 -0700
Subject: [PATCH]     Add lowering linalg.for to LLVM IR

    This CL adds lowering of linalg.for to LLVM IR and adds an IR test.
    This also replaces the usage of affine.for with linalg.for and enables the LLVM IR path in the integration test.

--

PiperOrigin-RevId: 250503798
---
 mlir/include/mlir/EDSC/Builders.h             | 10 +++
 mlir/include/mlir/Linalg/Utils/Utils.h        | 32 ++++++-
 mlir/lib/Linalg/IR/LinalgOps.cpp              |  7 +-
 .../Linalg/Transforms/LowerToLLVMDialect.cpp  | 86 ++++++++++++++++---
 mlir/lib/Linalg/Utils/Utils.cpp               | 35 +++++---
 mlir/test/Linalg/llvm.mlir                    | 58 +++++++++++++
 mlir/test/Linalg/loops.mlir                   | 16 ++--
 mlir/test/Linalg/tile.mlir                    | 34 ++++----
 .../linalg_integration_test.mlir              |  1 +
 9 files changed, 219 insertions(+), 60 deletions(-)
diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h
index 4be2755c84cf..6f7a0244a726 100644
--- a/mlir/include/mlir/EDSC/Builders.h
+++ b/mlir/include/mlir/EDSC/Builders.h
@@ -364,6 +364,7 @@ struct OperationHandle : public CapturableHandle {
   /// of MLIR without duplicating the type system or the op definitions.
   template <typename Op, typename... Args>
   static OperationHandle create(Args... args);
+  template <typename Op, typename... Args> static Op createOp(Args... args);
 
   /// Generic create for a named operation.
   static OperationHandle create(StringRef name, ArrayRef<ValueHandle> operands,
@@ -435,6 +436,15 @@ OperationHandle OperationHandle::create(Args... args) {
                              .getOperation());
 }
 
+template <typename Op, typename... Args>
+Op OperationHandle::createOp(Args... args) {
+  return cast<Op>(
+      OperationHandle(ScopedContext::getBuilder()
+                          ->create<Op>(ScopedContext::getLocation(), args...)
+                          .getOperation())
+          .getOperation());
+}
+
 template <typename Op, typename... Args>
 ValueHandle ValueHandle::create(Args... args) {
   Operation *op = ScopedContext::getBuilder()
diff --git a/mlir/include/mlir/Linalg/Utils/Utils.h b/mlir/include/mlir/Linalg/Utils/Utils.h
index 26fcbc7338a2..31963b243b0a 100644
--- a/mlir/include/mlir/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Linalg/Utils/Utils.h
@@ -24,10 +24,34 @@
 namespace mlir {
 
 namespace edsc {
-/// Helper class to sugar building loop nests from ranges.
+
+/// A LoopRangeBuilder is a generic NestedBuilder for linalg.for operations.
+/// More specifically it is meant to be used as a temporary object for
+/// representing any nested MLIR construct that is "related to" an mlir::Value*
+/// (for now an induction variable).
+class LoopRangeBuilder : public NestedBuilder {
+public:
+  /// Constructs a new linalg::ForOp and captures the associated induction
+  /// variable. A ValueHandle pointer is passed as the first argument and is the
+  /// *only* way to capture the loop induction variable.
+  LoopRangeBuilder(ValueHandle *iv, ValueHandle range);
+  LoopRangeBuilder(ValueHandle *iv, Value *range);
+
+  LoopRangeBuilder(const LoopRangeBuilder &) = delete;
+  LoopRangeBuilder(LoopRangeBuilder &&) = default;
+
+  LoopRangeBuilder &operator=(const LoopRangeBuilder &) = delete;
+  LoopRangeBuilder &operator=(LoopRangeBuilder &&) = default;
+
+  /// The only purpose of this operator is to serve as a sequence point so that
+  /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is
+  /// scoped within a LoopRangeBuilder.
+  ValueHandle operator()(std::function<void(void)> fun = nullptr);
+};
+
+/// Helper class to sugar building linalg.for loop nests from ranges.
 /// This is similar to edsc::LoopNestBuilder except it works on ranges directly.
-/// In the current implementation it produces affine.for operations and thus
-/// only admits ranges with constant steps.
+/// In the current implementation it produces linalg.for operations.
 class LoopNestRangeBuilder {
 public:
   LoopNestRangeBuilder(llvm::ArrayRef<edsc::ValueHandle *> ivs,
@@ -37,7 +61,7 @@ public:
   edsc::ValueHandle operator()(std::function<void(void)> fun = nullptr);
 
 private:
-  llvm::SmallVector<edsc::LoopBuilder, 4> loops;
+  llvm::SmallVector<LoopRangeBuilder, 4> loops;
 };
 
 } // namespace edsc
diff --git a/mlir/lib/Linalg/IR/LinalgOps.cpp b/mlir/lib/Linalg/IR/LinalgOps.cpp
index f222fcee0188..dd32d5c45049 100644
--- a/mlir/lib/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Linalg/IR/LinalgOps.cpp
@@ -158,10 +158,9 @@ LogicalResult mlir::linalg::ForOp::verify() {
     return emitOpError("lower bound operand must be an index");
   if (!getUpperBound()->getType().isa<IndexType>())
     return emitOpError("upper bound operand must be an index");
-  if (!getLowerBound()->getType().dyn_cast<IndexType>())
+  if (!getStep()->getType().dyn_cast<IndexType>())
     return emitOpError("step operand must be an index");
-  if (auto cst =
-          dyn_cast_or_null<ConstantIndexOp>(getLowerBound()->getDefiningOp()))
+  if (auto cst = dyn_cast_or_null<ConstantIndexOp>(getStep()->getDefiningOp()))
     if (cst.getValue() <= 0)
       return emitOpError("constant step operand must be positive");
 
@@ -767,7 +766,7 @@ void mlir::linalg::emitScalarImplementation(
   using linalg_store = OperationBuilder<linalg::StoreOp>;
   using IndexedValue = TemplatedIndexedValue<linalg_load, linalg_store>;
   assert(reductionIvs.size() == 1);
-  auto innermostLoop = mlir::getForInductionVarOwner(reductionIvs.back());
+  auto innermostLoop = linalg::getForInductionVarOwner(reductionIvs.back());
   auto *body = innermostLoop.getBody();
   using edsc::op::operator+;
   using edsc::op::operator*;
diff --git a/mlir/lib/Linalg/Transforms/LowerToLLVMDialect.cpp b/mlir/lib/Linalg/Transforms/LowerToLLVMDialect.cpp
index d4be38e610fc..c686af83210f 100644
--- a/mlir/lib/Linalg/Transforms/LowerToLLVMDialect.cpp
+++ b/mlir/lib/Linalg/Transforms/LowerToLLVMDialect.cpp
@@ -35,6 +35,7 @@
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/LowerAffine.h"
 #include "mlir/Transforms/Passes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
@@ -48,20 +49,22 @@ using namespace mlir::edsc::intrinsics;
 using namespace mlir::LLVM;
 using namespace mlir::linalg;
 
-using undef = ValueBuilder<mlir::LLVM::UndefOp>;
-using insertvalue = ValueBuilder<mlir::LLVM::InsertValueOp>;
-using extractvalue = ValueBuilder<mlir::LLVM::ExtractValueOp>;
-using constant = ValueBuilder<mlir::LLVM::ConstantOp>;
 using add = ValueBuilder<mlir::LLVM::AddOp>;
-using sub = ValueBuilder<mlir::LLVM::SubOp>;
-using mul = ValueBuilder<mlir::LLVM::MulOp>;
+using addi = ValueBuilder<mlir::AddIOp>;
 using bitcast = ValueBuilder<mlir::LLVM::BitcastOp>;
 using call = OperationBuilder<mlir::LLVM::CallOp>;
+using cmpi = ValueBuilder<mlir::CmpIOp>;
+using constant = ValueBuilder<mlir::LLVM::ConstantOp>;
+using extractvalue = ValueBuilder<mlir::LLVM::ExtractValueOp>;
 using gep = ValueBuilder<mlir::LLVM::GEPOp>;
+using insertvalue = ValueBuilder<mlir::LLVM::InsertValueOp>;
 using llvm_load = ValueBuilder<LLVM::LoadOp>;
 using llvm_store = OperationBuilder<LLVM::StoreOp>;
 using llvm_select = ValueBuilder<LLVM::SelectOp>;
-using icmp = ValueBuilder<LLVM::ICmpOp>;
+using llvm_icmp = ValueBuilder<LLVM::ICmpOp>;
+using mul = ValueBuilder<mlir::LLVM::MulOp>;
+using sub = ValueBuilder<mlir::LLVM::SubOp>;
+using undef = ValueBuilder<mlir::LLVM::UndefOp>;
 
 template <typename T>
 static LLVMType getPtrToElementType(T containerType,
@@ -384,11 +387,11 @@ public:
     Value *desc = undef(rangeDescriptorTy);
     desc = insertvalue(
         rangeDescriptorTy, desc,
-        llvm_select(int64Ty, icmp(int1Ty, SGE, min1, min2), min1, min2),
+        llvm_select(int64Ty, llvm_icmp(int1Ty, SGE, min1, min2), min1, min2),
         positionAttr(rewriter, 0));
     desc = insertvalue(
         rangeDescriptorTy, desc,
-        llvm_select(int64Ty, icmp(int1Ty, SLE, max1, max2), max1, max2),
+        llvm_select(int64Ty, llvm_icmp(int1Ty, SLE, max1, max2), max1, max2),
         positionAttr(rewriter, 1));
     // TODO(ntv): this assumes both steps are one for now. Enforce and extend.
     desc = insertvalue(rangeDescriptorTy, desc, mul(step1, step2),
@@ -548,7 +551,8 @@ public:
   }
 };
 
-// DotOp creates a new range descriptor.
+// DotOp creates a new call to the `linalg_dot` function which is assumed to
+// have been declared in the current module.
 class DotOpConversion : public LLVMOpLowering {
 public:
   explicit DotOpConversion(MLIRContext *context, LLVMTypeConverter &lowering_)
@@ -605,13 +609,67 @@ struct LowerLinalgToLLVMPass : public ModulePass<LowerLinalgToLLVMPass> {
 };
 } // namespace
 
+// Converts a `linalg.for` op to CFG form before actual conversion to the LLVM
+// dialect starts.
+static void lowerLinalgForToCFG(Function &f) {
+  // Collect all the For operations. We do this as a prepass to avoid
+  // invalidating the walker with our rewrite.
+  SmallVector<linalg::ForOp, 8> instsToRewrite;
+  f.walk<ForOp>([&](ForOp op) { instsToRewrite.push_back(op); });
+
+  for (auto forOp : llvm::reverse(instsToRewrite)) {
+    auto *op = forOp.getOperation();
+    auto loc = op->getLoc();
+    using namespace edsc::op;
+    FuncBuilder builder(op);
+    ScopedContext scope(builder, loc);
+    ValueHandle lb(forOp.getLowerBound()), ub(forOp.getUpperBound()),
+        step(forOp.getStep());
+
+    // 1. Split Block into init and end blocks, create body and condition blocks
+    // with the `iv` block argument.
+    auto *initBlock = op->getBlock();
+    auto *endBlock = initBlock->splitBlock(op);
+    BlockHandle conditionBlock, bodyBlock;
+    ValueHandle iv(IndexType::get(op->getContext()));
+    BlockBuilder(&conditionBlock, {&iv})();
+    BlockBuilder(&bodyBlock, {})();
+
+    // 2. Create and fill the condition block whose sole purpose is to evaluate
+    // iv and branch to either `bodyBlock` or `endBlock`. Add all branches to
+    // the `conditionBlock`.
+    // clang-format off
+    BlockBuilder(conditionBlock, Append())([&] {
+      auto cmp = cmpi(CmpIPredicate::SGT, ub, iv);
+      cond_br(cmp, bodyBlock, {}, endBlock, {});
+    });
+    BlockBuilder(bodyBlock, Append())([&] {
+      br(conditionBlock, addi(iv, step));
+    });
+    BlockBuilder(initBlock, Append())([&] {
+      br(conditionBlock, lb);
+    });
+    // clang-format on
+
+    // 3. Move the instructions from the for loop to the body, update all uses
+    // of the induction variable and clean up.
+    auto *oldBody = forOp.getBody();
+    bodyBlock.getBlock()->getOperations().splice(
+        bodyBlock.getBlock()->begin(), oldBody->getOperations(),
+        oldBody->begin(), std::prev(oldBody->end()));
+    forOp.getInductionVar()->replaceAllUsesWith(iv);
+    forOp.erase();
+  }
+}
+
 void LowerLinalgToLLVMPass::runOnModule() {
   auto &module = getModule();
 
-  PassManager pm;
-  pm.addPass(createLowerAffinePass());
-  if (failed(pm.run(&module)))
-    signalPassFailure();
+  for (auto &f : module.getFunctions()) {
+    lowerLinalgForToCFG(f);
+    if (failed(lowerAffineConstructs(f)))
+      signalPassFailure();
+  }
 
   // Convert to the LLVM IR dialect using the converter defined above.
   OwningRewritePatternList patterns;
diff --git a/mlir/lib/Linalg/Utils/Utils.cpp b/mlir/lib/Linalg/Utils/Utils.cpp
index 4928c19abeb4..c3fea9b227ca 100644
--- a/mlir/lib/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Linalg/Utils/Utils.cpp
@@ -35,19 +35,34 @@ using namespace mlir::edsc;
 using namespace mlir::edsc::intrinsics;
 using namespace mlir::linalg;
 
+mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(ValueHandle *iv,
+                                               ValueHandle range) {
+  assert(range.getType() && "expected !linalg.range type");
+  assert(range.getValue()->getDefiningOp() &&
+         "need operations to extract range parts");
+  auto rangeOp = cast<RangeOp>(range.getValue()->getDefiningOp());
+  auto lb = rangeOp.min();
+  auto ub = rangeOp.max();
+  auto step = rangeOp.step();
+  auto forOp = OperationHandle::createOp<linalg::ForOp>(lb, ub, step);
+  *iv = ValueHandle(forOp.getInductionVar());
+  auto *body = forOp.getBody();
+  enter(body, /*prev=*/1);
+}
+
+ValueHandle
+mlir::edsc::LoopRangeBuilder::operator()(std::function<void(void)> fun) {
+  if (fun)
+    fun();
+  exit();
+  return ValueHandle::null();
+}
+
 mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder(
     ArrayRef<ValueHandle *> ivs, ArrayRef<ValueHandle> ranges) {
+  loops.reserve(ranges.size());
   for (unsigned i = 0, e = ranges.size(); i < e; ++i) {
-    assert(ranges[i].getType() && "expected !linalg.range type");
-    assert(ranges[i].getValue()->getDefiningOp() &&
-           "need operations to extract range parts");
-    auto rangeOp = cast<RangeOp>(ranges[i].getValue()->getDefiningOp());
-    auto lb = rangeOp.min();
-    auto ub = rangeOp.max();
-    // This must be a constexpr index until we relax the affine.for constraint
-    auto step =
-        cast<ConstantIndexOp>(rangeOp.step()->getDefiningOp()).getValue();
-    loops.emplace_back(ivs[i], ValueHandle(lb), ValueHandle(ub), step);
+    loops.emplace_back(ivs[i], ranges[i]);
   }
   assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size");
 }
diff --git a/mlir/test/Linalg/llvm.mlir b/mlir/test/Linalg/llvm.mlir
index 73da48dfad82..3381d8f04215 100644
--- a/mlir/test/Linalg/llvm.mlir
+++ b/mlir/test/Linalg/llvm.mlir
@@ -117,3 +117,61 @@ func @range_intersect(%arg0: !linalg.range, %arg1: !linalg.range) -> !linalg.ran
 //       CHECK:   %13 = llvm.mul %4, %5 : !llvm.i64
 //       CHECK:   %14 = llvm.insertvalue %13, %12[2] : !llvm<"{ i64, i64, i64 }">
 //       CHECK:   llvm.return %14 : !llvm<"{ i64, i64, i64 }">
+
+func @linalg_for(%arg0 : index, %arg1 : index, %arg2 : index) {
+  linalg.for %i0 = %arg0 to %arg1 step %arg2 {
+    %a = muli %i0, %arg0 : index
+  }
+  return
+}
+// CHECK-LABEL: func @linalg_for(%arg0: !llvm.i64, %arg1: !llvm.i64, %arg2: !llvm.i64) {
+//       CHECK:   llvm.br ^bb2(%arg0 : !llvm.i64)
+//       CHECK: ^bb1:   // pred: ^bb2
+//       CHECK:   llvm.return
+//       CHECK: ^bb2(%0: !llvm.i64):    // 2 preds: ^bb0, ^bb3
+//       CHECK:   %1 = llvm.icmp "sgt" %arg1, %0 : !llvm.i64
+//       CHECK:   llvm.cond_br %1, ^bb3, ^bb1
+//       CHECK: ^bb3:   // pred: ^bb2
+//       CHECK:   %2 = llvm.mul %0, %arg0 : !llvm.i64
+//       CHECK:   %3 = llvm.add %0, %arg2 : !llvm.i64
+//       CHECK:   llvm.br ^bb2(%3 : !llvm.i64)
+
+func @linalg_for_2(%arg0 : index, %arg1 : index, %arg2 : index) {
+  linalg.for %i0 = %arg0 to %arg1 step %arg2 {
+    linalg.for %i1 = %arg0 to %arg1 step %arg2 {
+      %a = muli %i0, %i1 : index
+    }
+    linalg.for %i2 = %arg0 to %arg1 step %arg2 {
+      %b = muli %i0, %i2 : index
+    }
+  }
+  return
+}
+// CHECK-LABEL: func @linalg_for_2(%arg0: !llvm.i64, %arg1: !llvm.i64, %arg2: !llvm.i64) {
+//       CHECK:   llvm.br ^bb2(%arg0 : !llvm.i64)
+//       CHECK: ^bb1:   // pred: ^bb2
+//       CHECK:   llvm.return
+//       CHECK: ^bb2(%0: !llvm.i64):    // 2 preds: ^bb0, ^bb5
+//       CHECK:   %1 = llvm.icmp "sgt" %arg1, %0 : !llvm.i64
+//       CHECK:   llvm.cond_br %1, ^bb3, ^bb1
+//       CHECK: ^bb3:   // pred: ^bb2
+//       CHECK:   llvm.br ^bb8(%arg0 : !llvm.i64)
+//       CHECK: ^bb4:   // pred: ^bb8
+//       CHECK:   llvm.br ^bb6(%arg0 : !llvm.i64)
+//       CHECK: ^bb5:   // pred: ^bb6
+//       CHECK:   %2 = llvm.add %0, %arg2 : !llvm.i64
+//       CHECK:   llvm.br ^bb2(%2 : !llvm.i64)
+//       CHECK: ^bb6(%3: !llvm.i64):    // 2 preds: ^bb4, ^bb7
+//       CHECK:   %4 = llvm.icmp "sgt" %arg1, %3 : !llvm.i64
+//       CHECK:   llvm.cond_br %4, ^bb7, ^bb5
+//       CHECK: ^bb7:   // pred: ^bb6
+//       CHECK:   %5 = llvm.mul %0, %3 : !llvm.i64
+//       CHECK:   %6 = llvm.add %3, %arg2 : !llvm.i64
+//       CHECK:   llvm.br ^bb6(%6 : !llvm.i64)
+//       CHECK: ^bb8(%7: !llvm.i64):    // 2 preds: ^bb3, ^bb9
+//       CHECK:   %8 = llvm.icmp "sgt" %arg1, %7 : !llvm.i64
+//       CHECK:   llvm.cond_br %8, ^bb9, ^bb4
+//       CHECK: ^bb9:   // pred: ^bb8
+//       CHECK:   %9 = llvm.mul %0, %7 : !llvm.i64
+//       CHECK:   %10 = llvm.add %7, %arg2 : !llvm.i64
+//       CHECK:   llvm.br ^bb8(%10 : !llvm.i64)
diff --git a/mlir/test/Linalg/loops.mlir b/mlir/test/Linalg/loops.mlir
index 5e9246f01295..fbed1c7df314 100644
--- a/mlir/test/Linalg/loops.mlir
+++ b/mlir/test/Linalg/loops.mlir
@@ -1,7 +1,5 @@
 // RUN: mlir-opt %s -linalg-lower-to-loops | FileCheck %s
 
-// CHECK: #[[ID:.*]] = (d0) -> (d0)
-
 func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
@@ -21,9 +19,9 @@ func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //       CHECK: %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //       CHECK: %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
 //       CHECK: %[[N:.*]] = linalg.dim %[[B]], 1 : !linalg.view<?x?xf32>
-//       CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%[[M]]) {
-//       CHECK:   affine.for %i1 = #[[ID]](%c0) to #[[ID]](%[[N]]) {
-//       CHECK:     affine.for %i2 = #[[ID]](%c0) to #[[ID]](%[[K]]) {
+//       CHECK: linalg.for %i0 = %c0 to %[[M]] step %c1 {
+//       CHECK:   linalg.for %i1 = %c0 to %[[N]] step %c1 {
+//       CHECK:     linalg.for %i2 = %c0 to %[[K]] step %c1 {
 //   CHECK-DAG:       %[[a:.*]] = linalg.load %[[A]][%i0, %i2] : !linalg.view<?x?xf32>
 //   CHECK-DAG:       %[[b:.*]] = linalg.load %[[B]][%i2, %i1] : !linalg.view<?x?xf32>
 //   CHECK-DAG:       %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -48,8 +46,8 @@ func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //       CHECK: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //       CHECK: %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //       CHECK: %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
-//       CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%[[M]]) {
-//       CHECK:   affine.for %i1 = #[[ID]](%c0) to #[[ID]](%[[K]]) {
+//       CHECK: linalg.for %i0 = %c0 to %[[M]] step %c1 {
+//       CHECK:   linalg.for %i1 = %c0 to %[[K]] step %c1 {
 //   CHECK-DAG:     %[[a:.*]] = linalg.load %[[A]][%i0, %i1] : !linalg.view<?x?xf32>
 //   CHECK-DAG:     %[[b:.*]] = linalg.load %[[B]][%i1] : !linalg.view<?xf32>
 //   CHECK-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -72,7 +70,7 @@ func @dot(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index)
 //       CHECK: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //       CHECK: %[[C:.*]] = linalg.view %arg0[] : !linalg.view<f32>
 //       CHECK: %[[K:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?xf32>
-//       CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%[[K]]) {
+//       CHECK: linalg.for %i0 = %c0 to %[[K]] step %c1 {
 //   CHECK-DAG:   %[[a:.*]] = linalg.load %[[A]][%i0] : !linalg.view<?xf32>
 //   CHECK-DAG:   %[[b:.*]] = linalg.load %[[B]][%i0] : !linalg.view<?xf32>
 //   CHECK-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -86,7 +84,7 @@ func @dot_view(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !l
 }
 // CHECK-LABEL: func @dot_view(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {
 //       CHECK: %[[K:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>
-//       CHECK: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%[[K]]) {
+//       CHECK: linalg.for %i0 = %c0 to %[[K]] step %c1 {
 //   CHECK-DAG:   %[[a:.*]] = linalg.load %arg0[%i0] : !linalg.view<?xf32>
 //   CHECK-DAG:   %[[b:.*]] = linalg.load %arg1[%i0] : !linalg.view<?xf32>
 //   CHECK-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
diff --git a/mlir/test/Linalg/tile.mlir b/mlir/test/Linalg/tile.mlir
index bf4c656249cf..620ee086dfb8 100644
--- a/mlir/test/Linalg/tile.mlir
+++ b/mlir/test/Linalg/tile.mlir
@@ -3,13 +3,9 @@
 // RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=0,0,2 | FileCheck %s -check-prefix=TILE-002
 // RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 | FileCheck %s -check-prefix=TILE-234
 
-//   TILE-2-DAG: #[[ID:.*]] = (d0) -> (d0)
 //   TILE-2-DAG: #[[UB0:.*]] = (d0) -> (d0 + 2)
-//  TILE-02-DAG: #[[ID:.*]] = (d0) -> (d0)
 //  TILE-02-DAG: #[[UB0:.*]] = (d0) -> (d0 + 2)
-// TILE-002-DAG: #[[ID:.*]] = (d0) -> (d0)
 // TILE-002-DAG: #[[UB0:.*]] = (d0) -> (d0 + 2)
-// TILE-234-DAG: #[[ID:.*]] = (d0) -> (d0)
 // TILE-234-DAG: #[[UB0:.*]] = (d0) -> (d0 + 2)
 // TILE-234-DAG: #[[UB1:.*]] = (d0) -> (d0 + 3)
 // TILE-234-DAG: #[[UB2:.*]] = (d0) -> (d0 + 4)
@@ -31,7 +27,7 @@ func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //  TILE-2-NEXT: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
 //  TILE-2-NEXT: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
 //       TILE-2: %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
-//       TILE-2: affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[M]]) step 2 {
+//       TILE-2: linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 {
 //  TILE-2-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-2-NEXT:   %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //  TILE-2-NEXT:   %[[cmpuba:.*]] = cmpi "slt", %[[M]], %[[a]] : index
@@ -53,7 +49,7 @@ func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //  TILE-02-NEXT: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
 //  TILE-02-NEXT: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
 //       TILE-02: %[[N:.*]] = linalg.dim %[[B]], 1 : !linalg.view<?x?xf32>
-//       TILE-02: affine.for %i0 = #[[ID]](%c0) to #[[ID]](%[[N]]) step 2 {
+//       TILE-02: linalg.for %i0 = %c0 to %[[N]] step %c2 {
 //       TILE-02:   %[[b:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-02-NEXT:   %[[N:.*]] = linalg.dim %[[B]], 1 : !linalg.view<?x?xf32>
 //  TILE-02-NEXT:   %[[cmpubb:.*]] = cmpi "slt", %[[N]], %[[b]] : index
@@ -75,7 +71,7 @@ func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //  TILE-002-NEXT: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
 //  TILE-002-NEXT: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
 //       TILE-002: %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
-//       TILE-002: affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[K]]) step 2 {
+//       TILE-002: linalg.for %i0 = %c0{{.*}} to %[[K]] step %c2 {
 //       TILE-002:   %[[a:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-002-NEXT:   %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
 //  TILE-002-NEXT:   %[[cmpuba:.*]] = cmpi "slt", %[[K]], %[[a]] : index
@@ -99,9 +95,9 @@ func @matmul(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //       TILE-234: %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //       TILE-234: %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
 //       TILE-234: %[[N:.*]] = linalg.dim %[[B]], 1 : !linalg.view<?x?xf32>
-//       TILE-234:  affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[M]]) step 2 {
-//  TILE-234-NEXT:    affine.for %i1 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[N]]) step 3 {
-//  TILE-234-NEXT:      affine.for %i2 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[K]]) step 4 {
+//       TILE-234:  linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 {
+//  TILE-234-NEXT:    linalg.for %i1 = %c0{{.*}} to %[[N]] step %c3 {
+//  TILE-234-NEXT:      linalg.for %i2 = %c0{{.*}} to %[[K]] step %c4 {
 //  TILE-234-NEXT:        %[[ai:.*]]  = affine.apply #[[UB0]](%i0)
 //  TILE-234-NEXT:        %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //  TILE-234-NEXT:        %[[cmpubai:.*]] = cmpi "slt", %[[M]], %[[ai]] : index
@@ -159,7 +155,7 @@ func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //  TILE-2-NEXT: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //  TILE-2-NEXT: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //       TILE-2: %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
-//       TILE-2: affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[M]]) step 2 {
+//       TILE-2: linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 {
 //  TILE-2-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-2-NEXT:   %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //  TILE-2-NEXT:   %[[cmpuba:.*]] = cmpi "slt", %[[M]], %[[a]] : index
@@ -181,7 +177,7 @@ func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //  TILE-02-NEXT: %[[B:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //  TILE-02-NEXT: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //       TILE-02: %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
-//       TILE-02: affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[K]]) step 2 {
+//       TILE-02: linalg.for %i0 = %c0{{.*}} to %[[K]] step %c2 {
 //       TILE-02:   %[[a:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-02-NEXT:   %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
 //  TILE-02-NEXT:   %[[cmpuba:.*]] = cmpi "slt", %[[K]], %[[a]] : index
@@ -199,7 +195,7 @@ func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //       TILE-02:   linalg.matvec(%[[sAj]], %[[sBj]], %[[C]]) : !linalg.view<?x?xf32>, !linalg.view<?xf32>, !linalg.view<?xf32>
 
 // TILE-002-LABEL: func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
-//   TILE-002-NOT: affine.for
+//   TILE-002-NOT: linalg.for
 
 // TILE-234-LABEL: func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: index) {
 //       TILE-234: %[[A:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?x?xf32>
@@ -207,8 +203,8 @@ func @matvec(%arg0: !linalg.buffer<f32>, %arg1: index, %arg2: index, %arg3: inde
 //  TILE-234-NEXT: %[[C:.*]] = linalg.view %arg0[{{.*}}] : !linalg.view<?xf32>
 //       TILE-234: %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //       TILE-234: %[[K:.*]] = linalg.dim %[[A]], 1 : !linalg.view<?x?xf32>
-//       TILE-234:  affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[M]]) step 2 {
-//  TILE-234-NEXT:    affine.for %i1 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[K]]) step 3 {
+//       TILE-234:  linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 {
+//  TILE-234-NEXT:    linalg.for %i1 = %c0{{.*}} to %[[K]] step %c3 {
 //  TILE-234-NEXT:      %[[ai:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-234-NEXT:      %[[M:.*]] = linalg.dim %[[A]], 0 : !linalg.view<?x?xf32>
 //  TILE-234-NEXT:      %[[cmpubai:.*]] = cmpi "slt", %[[M]], %[[ai]] : index
@@ -244,7 +240,7 @@ func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg
 }
 // TILE-2-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {
 //       TILE-2: %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>
-//       TILE-2: affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[M]]) step 2 {
+//       TILE-2: linalg.for %i0 = %c0{{.*}} to %[[M]] step %c2 {
 //  TILE-2-NEXT:   %[[a:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-2-NEXT:   %[[M:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>
 //  TILE-2-NEXT:   %[[cmpuba:.*]] = cmpi "slt", %[[M]], %[[a]] : index
@@ -262,14 +258,14 @@ func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg
 //  TILE-2-NEXT:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : !linalg.view<?xf32>, !linalg.view<?xf32>, !linalg.view<f32>
 
 // TILE-02-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {
-//   TILE-02-NOT: affine.for
+//   TILE-02-NOT: linalg.for
 
 // TILE-002-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {
-//   TILE-002-NOT: affine.for
+//   TILE-002-NOT: linalg.for
 
 // TILE-234-LABEL: func @dot(%arg0: !linalg.view<?xf32>, %arg1: !linalg.view<?xf32>, %arg2: !linalg.view<f32>) {
 //       TILE-234: %[[K:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>
-//       TILE-234:  affine.for %i0 = #[[ID]](%c0{{.*}}) to #[[ID]](%[[K]]) step 2 {
+//       TILE-234:  linalg.for %i0 = %c0{{.*}} to %[[K]] step %c2 {
 //  TILE-234-NEXT:    %[[a:.*]] = affine.apply #[[UB0]](%i0)
 //  TILE-234-NEXT:    %[[K:.*]] = linalg.dim %arg0, 0 : !linalg.view<?xf32>
 //  TILE-234-NEXT:    %[[cmpuba:.*]] = cmpi "slt", %[[K]], %[[a]] : index
diff --git a/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir b/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir
index 4ecd64613024..1be163f1c79d 100644
--- a/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir
+++ b/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir
@@ -1,4 +1,5 @@
 // RUN: mlir-opt %s -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e entry1 -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libsdot%shlibext | FileCheck %s
+// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e entry1 -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libsdot%shlibext | FileCheck %s
 
 func @cblas_sdot(!llvm.i64, !llvm<"float*">, !llvm.i64, !llvm<"float*">, !llvm.i64) -> !llvm.float
 
-- 
2.34.1