From 40d8e4d3f992c5749c949b493e2d0866bc82a451 Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Mon, 15 Mar 2021 10:26:41 +0100
Subject: [PATCH] Revert "[Canonicalizer] Process regions top-down instead of
 bottom up & reuse existing constants."

This reverts commit b5d9a3c92358349d5444ab28de8ab5b2bee33a01.

The commit introduced a memory error in canonicalization/operation
walking that is exposed when compiled with ASAN. It leads to crashes in
some "release" configurations.
---
 mlir/include/mlir/Transforms/FoldUtils.h           |  6 +-
 mlir/lib/Transforms/Utils/FoldUtils.cpp            | 97 +++-------------------
 .../Utils/GreedyPatternRewriteDriver.cpp           | 29 ++-----
 .../Conversion/StandardToSPIRV/legalization.mlir   |  2 +-
 .../Conversion/VectorToSCF/vector-to-loops.mlir    | 13 +--
 mlir/test/Dialect/Affine/canonicalize.mlir         |  6 +-
 mlir/test/Dialect/Linalg/sparse_2d.mlir            |  2 +-
 mlir/test/Dialect/Linalg/transform-patterns.mlir   |  2 +-
 mlir/test/Dialect/Quant/convert-const.mlir         |  6 +-
 mlir/test/Dialect/SCF/canonicalize.mlir            | 10 +--
 .../Dialect/SPIRV/Transforms/canonicalize.mlir     | 63 ++++++--------
 mlir/test/Dialect/Tensor/canonicalize.mlir         |  8 +-
 mlir/test/Dialect/Vector/canonicalize.mlir         | 12 +--
 .../Dialect/Vector/vector-contract-transforms.mlir |  7 +-
 .../Dialect/Vector/vector-flat-transforms.mlir     | 10 +--
 .../Vector/vector-transfer-full-partial-split.mlir | 12 +--
 .../Dialect/Vector/vector-transfer-unroll.mlir     | 12 +--
 mlir/test/Dialect/Vector/vector-transforms.mlir    |  6 +-
 mlir/test/Transforms/canonicalize.mlir             | 12 +--
 mlir/test/Transforms/parallel-loop-collapsing.mlir | 12 +--
 .../single-parallel-loop-collapsing.mlir           | 14 ++--
 mlir/test/Transforms/test-canonicalize.mlir        | 19 -----
 mlir/test/mlir-tblgen/pattern.mlir                 |  6 +-
 23 files changed, 119 insertions(+), 247 deletions(-)

diff --git a/mlir/include/mlir/Transforms/FoldUtils.h b/mlir/include/mlir/Transforms/FoldUtils.h
index c31ac15..ad406cb 100644
--- a/mlir/include/mlir/Transforms/FoldUtils.h
+++ b/mlir/include/mlir/Transforms/FoldUtils.h
@@ -23,6 +23,7 @@ namespace mlir {
 class Operation;
 class Value;
 
+
 //===--------------------------------------------------------------------===//
 // OperationFolder
 //===--------------------------------------------------------------------===//
@@ -33,11 +34,6 @@ class OperationFolder {
 public:
   OperationFolder(MLIRContext *ctx) : interfaces(ctx) {}
 
-  /// Scan the specified region for constants that can be used in folding,
-  /// moving them to the entry block and adding them to our known-constants
-  /// table.
-  void processExistingConstants(Region &region);
-
   /// Tries to perform folding on the given `op`, including unifying
   /// deduplicated constants. If successful, replaces `op`'s uses with
   /// folded results, and returns success. `preReplaceAction` is invoked on `op`
diff --git a/mlir/lib/Transforms/Utils/FoldUtils.cpp b/mlir/lib/Transforms/Utils/FoldUtils.cpp
index 5597ad6..024ae18 100644
--- a/mlir/lib/Transforms/Utils/FoldUtils.cpp
+++ b/mlir/lib/Transforms/Utils/FoldUtils.cpp
@@ -84,81 +84,6 @@ static Operation *materializeConstant(Dialect *dialect, OpBuilder &builder,
 // OperationFolder
 //===----------------------------------------------------------------------===//
 
-/// Scan the specified region for constants that can be used in folding,
-/// moving them to the entry block and adding them to our known-constants
-/// table.
-void OperationFolder::processExistingConstants(Region &region) {
-  if (region.empty())
-    return;
-
-  // March the constant insertion point forward, moving all constants to the
-  // top of the block, but keeping them in their order of discovery.
-  Region *insertRegion = getInsertionRegion(interfaces, &region.front());
-  auto &uniquedConstants = foldScopes[insertRegion];
-
-  Block &insertBlock = insertRegion->front();
-  Block::iterator constantIterator = insertBlock.begin();
-
-  // Process each constant that we discover in this region.
-  auto processConstant = [&](Operation *op, Attribute value) {
-    // Check to see if we already have an instance of this constant.
-    Operation *&constOp = uniquedConstants[std::make_tuple(
-        op->getDialect(), value, op->getResult(0).getType())];
-
-    // If we already have an instance of this constant, CSE/delete this one as
-    // we go.
-    if (constOp) {
-      if (constantIterator == Block::iterator(op))
-        ++constantIterator; // Don't invalidate our iterator when scanning.
-      op->getResult(0).replaceAllUsesWith(constOp->getResult(0));
-      op->erase();
-      return;
-    }
-
-    // Otherwise, remember that we have this constant.
-    constOp = op;
-    referencedDialects[op].push_back(op->getDialect());
-
-    // If the constant isn't already at the insertion point then move it up.
-    if (constantIterator == insertBlock.end() || &*constantIterator != op)
-      op->moveBefore(&insertBlock, constantIterator);
-    else
-      ++constantIterator; // It was pointing at the constant.
-  };
-
-  SmallVector<Operation *> isolatedOps;
-  region.walk<WalkOrder::PreOrder>([&](Operation *op) {
-    // If this is a constant, process it.
-    Attribute value;
-    if (matchPattern(op, m_Constant(&value))) {
-      processConstant(op, value);
-      // We may have deleted the operation, don't check it for regions.
-      return WalkResult::advance();
-    }
-
-    // If the operation has regions and is isolated, don't recurse into it.
-    if (op->getNumRegions() != 0) {
-      auto hasDifferentInsertRegion = [&](Region &region) {
-        return !region.empty() &&
-               getInsertionRegion(interfaces, &region.front()) != insertRegion;
-      };
-      if (llvm::any_of(op->getRegions(), hasDifferentInsertRegion)) {
-        isolatedOps.push_back(op);
-        return WalkResult::skip();
-      }
-    }
-
-    // Otherwise keep going.
-    return WalkResult::advance();
-  });
-
-  // Process regions in any isolated ops separately.
-  for (Operation *isolated : isolatedOps) {
-    for (Region &region : isolated->getRegions())
-      processExistingConstants(region);
-  }
-}
-
 LogicalResult OperationFolder::tryToFold(
     Operation *op, function_ref<void(Operation *)> processGeneratedConstants,
     function_ref<void(Operation *)> preReplaceAction, bool *inPlaceUpdate) {
@@ -337,19 +262,19 @@ Operation *OperationFolder::tryGetOrCreateConstant(
     Attribute value, Type type, Location loc) {
   // Check if an existing mapping already exists.
   auto constKey = std::make_tuple(dialect, value, type);
-  auto *&constOp = uniquedConstants[constKey];
-  if (constOp)
-    return constOp;
+  auto *&constInst = uniquedConstants[constKey];
+  if (constInst)
+    return constInst;
 
   // If one doesn't exist, try to materialize one.
-  if (!(constOp = materializeConstant(dialect, builder, value, type, loc)))
+  if (!(constInst = materializeConstant(dialect, builder, value, type, loc)))
     return nullptr;
 
   // Check to see if the generated constant is in the expected dialect.
-  auto *newDialect = constOp->getDialect();
+  auto *newDialect = constInst->getDialect();
   if (newDialect == dialect) {
-    referencedDialects[constOp].push_back(dialect);
-    return constOp;
+    referencedDialects[constInst].push_back(dialect);
+    return constInst;
   }
 
   // If it isn't, then we also need to make sure that the mapping for the new
@@ -359,13 +284,13 @@ Operation *OperationFolder::tryGetOrCreateConstant(
   // If an existing operation in the new dialect already exists, delete the
   // materialized operation in favor of the existing one.
   if (auto *existingOp = uniquedConstants.lookup(newKey)) {
-    constOp->erase();
+    constInst->erase();
     referencedDialects[existingOp].push_back(dialect);
-    return constOp = existingOp;
+    return constInst = existingOp;
   }
 
   // Otherwise, update the new dialect to the materialized operation.
-  referencedDialects[constOp].assign({dialect, newDialect});
-  auto newIt = uniquedConstants.insert({newKey, constOp});
+  referencedDialects[constInst].assign({dialect, newDialect});
+  auto newIt = uniquedConstants.insert({newKey, constInst});
   return newIt.first->second;
 }
diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
index 96e477d..9ed3b35 100644
--- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
@@ -107,8 +107,7 @@ private:
   // be re-added to the worklist. This function should be called when an
   // operation is modified or removed, as it may trigger further
   // simplifications.
-  template <typename Operands>
-  void addToWorklist(Operands &&operands) {
+  template <typename Operands> void addToWorklist(Operands &&operands) {
     for (Value operand : operands) {
       // If the use count of this operand is now < 2, we re-add the defining
       // operation to the worklist.
@@ -141,26 +140,15 @@ private:
 /// if the rewrite converges in `maxIterations`.
 bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
                                           int maxIterations) {
-  // Perform a prepass over the IR to discover constants.
-  for (auto &region : regions)
-    folder.processExistingConstants(region);
+  // Add the given operation to the worklist.
+  auto collectOps = [this](Operation *op) { addToWorklist(op); };
 
   bool changed = false;
-  int iteration = 0;
+  int i = 0;
   do {
-    assert(worklist.empty() &&
-           "Each iteration should start with empty worklist");
-
-    // Add all nested operations to the worklist in preorder.
+    // Add all nested operations to the worklist.
     for (auto &region : regions)
-      region.walk<WalkOrder::PreOrder>(
-          [this](Operation *op) { worklist.push_back(op); });
-
-    // Reverse the list so our pop-back loop processes them in-order.
-    std::reverse(worklist.begin(), worklist.end());
-    // Remember the reverse index.
-    for (unsigned i = 0, e = worklist.size(); i != e; ++i)
-      worklistMap[worklist[i]] = i;
+      region.walk(collectOps);
 
     // These are scratch vectors used in the folding loop below.
     SmallVector<Value, 8> originalOperands, resultValues;
@@ -198,9 +186,6 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
         notifyOperationRemoved(op);
       };
 
-      // Add the given operation to the worklist.
-      auto collectOps = [this](Operation *op) { addToWorklist(op); };
-
       // Try to fold this op.
       bool inPlaceUpdate;
       if ((succeeded(folder.tryToFold(op, collectOps, preReplaceAction,
@@ -221,7 +206,7 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef<Region> regions,
       folder.clear();
       changed = true;
     }
-  } while (changed && ++iteration < maxIterations);
+  } while (changed && ++i < maxIterations);
   // Whether the rewrite converges, i.e. wasn't changed in the last iteration.
   return !changed;
 }
diff --git a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
index 213762b..c5c5961 100644
--- a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
@@ -67,9 +67,9 @@ func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %arg1 :
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index
 func @fold_static_stride_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> vector<4xf32> {
   // CHECK-NOT: subview
+  // CHECK: [[F1:%.*]] = constant 1.000000e+00 : f32
   // CHECK: [[C2:%.*]] = constant 2 : index
   // CHECK: [[C3:%.*]] = constant 3 : index
-  // CHECK: [[F1:%.*]] = constant 1.000000e+00 : f32
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[C2]] : index
   // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
   // CHECK: [[STRIDE2:%.*]] = muli [[ARG4]], [[C3]] : index
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
index f6ee80f..7f69638 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
@@ -200,11 +200,12 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
 //  FULL-UNROLL-SAME:   %[[base:[a-zA-Z0-9]+]]: index
 
 func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x15xf32> {
+  // CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32
   %f7 = constant 7.0: f32
-  // CHECK-DAG: %[[C0:.*]] = constant 0 : index
+
   // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
   // CHECK-DAG: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>>
-  // CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32
+  // CHECK-DAG: %[[C0:.*]] = constant 0 : index
   // CHECK-DAG: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
   // CHECK: affine.for %[[I:.*]] = 0 to 3 {
   // CHECK:   %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
@@ -218,10 +219,10 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
   // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
   // CHECK: %[[cst:.*]] = load %[[vmemref]][] : memref<vector<3x15xf32>>
 
+  // FULL-UNROLL: %[[pad:.*]] = constant 7.000000e+00 : f32
   // FULL-UNROLL: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
   // FULL-UNROLL: %[[C0:.*]] = constant 0 : index
   // FULL-UNROLL: %[[SPLAT:.*]] = constant dense<7.000000e+00> : vector<15xf32>
-  // FULL-UNROLL: %[[pad:.*]] = constant 7.000000e+00 : f32
   // FULL-UNROLL: %[[DIM:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
   // FULL-UNROLL: cmpi slt, %[[base]], %[[DIM]] : index
   // FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
@@ -379,11 +380,11 @@ func @transfer_read_minor_identity(%A : memref<?x?x?x?xf32>) -> vector<3x3xf32>
 
 // CHECK-LABEL: transfer_read_minor_identity(
 //  CHECK-SAME:   %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
+//       CHECK:   %[[c0:.*]] = constant 0 : index
+//       CHECK:   %[[cst:.*]] = constant 0.000000e+00 : f32
 //       CHECK:   %[[c2:.*]] = constant 2 : index
 //       CHECK:   %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
 //       CHECK:   %[[m:.*]] = alloca() : memref<3xvector<3xf32>>
-//       CHECK:   %[[c0:.*]] = constant 0 : index
-//       CHECK:   %[[cst:.*]] = constant 0.000000e+00 : f32
 //       CHECK:   %[[d:.*]] = dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
 //       CHECK:   affine.for %[[arg1:.*]] = 0 to 3 {
 //       CHECK:      %[[cmp:.*]] = cmpi slt, %[[arg1]], %[[d]] : index
@@ -410,9 +411,9 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf3
 // CHECK-LABEL: transfer_write_minor_identity(
 //  CHECK-SAME:   %[[A:.*]]: vector<3x3xf32>,
 //  CHECK-SAME:   %[[B:.*]]: memref<?x?x?x?xf32>)
+//       CHECK:   %[[c0:.*]] = constant 0 : index
 //       CHECK:   %[[c2:.*]] = constant 2 : index
 //       CHECK:   %[[m:.*]] = alloca() : memref<3xvector<3xf32>>
-//       CHECK:   %[[c0:.*]] = constant 0 : index
 //       CHECK:   %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
 //       CHECK:   store %[[A]], %[[cast]][] : memref<vector<3x3xf32>>
 //       CHECK:   %[[d:.*]] = dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
index d0bece7..352066a 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -207,7 +207,7 @@ func @compose_affine_maps_diamond_dependency(%arg0: f32, %arg1: memref<4x4xf32>)
 
 // -----
 
-// CHECK-DAG: #[[$MAP14:.*]] = affine_map<()[s0, s1] -> ((s0 * 4 + s1 * 4) floordiv s0)>
+// CHECK-DAG: #[[$MAP14:.*]] = affine_map<()[s0, s1] -> (((s1 + s0) * 4) floordiv s0)>
 
 // CHECK-LABEL: func @compose_affine_maps_multiple_symbols
 func @compose_affine_maps_multiple_symbols(%arg0: index, %arg1: index) -> index {
@@ -312,7 +312,7 @@ func @symbolic_composition_c(%arg0: index, %arg1: index, %arg2: index, %arg3: in
 
 // -----
 
-// CHECK-DAG: #[[$MAP_symbolic_composition_d:.*]] = affine_map<()[s0, s1] -> (s0 * 3 + s1)>
+// CHECK-DAG: #[[$MAP_symbolic_composition_d:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)>
 
 // CHECK-LABEL: func @symbolic_composition_d(
 //  CHECK-SAME:   %[[ARG0:[0-9a-zA-Z]+]]: index
@@ -321,7 +321,7 @@ func @symbolic_composition_d(%arg0: index, %arg1: index, %arg2: index, %arg3: in
   %0 = affine.apply affine_map<(d0) -> (d0)>(%arg0)
   %1 = affine.apply affine_map<()[s0] -> (s0)>()[%arg1]
   %2 = affine.apply affine_map<()[s0, s1, s2, s3] -> (s0 + s1 + s2 + s3)>()[%0, %0, %0, %1]
-  // CHECK: %{{.*}} = affine.apply #[[$MAP_symbolic_composition_d]]()[%[[ARG0]], %[[ARG1]]]
+  // CHECK: %{{.*}} = affine.apply #[[$MAP_symbolic_composition_d]]()[%[[ARG1]], %[[ARG0]]]
   return %2 : index
 }
 
diff --git a/mlir/test/Dialect/Linalg/sparse_2d.mlir b/mlir/test/Dialect/Linalg/sparse_2d.mlir
index 9450f73..d560e04 100644
--- a/mlir/test/Dialect/Linalg/sparse_2d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_2d.mlir
@@ -1163,9 +1163,9 @@ func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor<f32>) -> tensor<f32>
 // CHECK-LABEL:   func @scale(
 // CHECK-SAME:                %[[VAL_0:.*]]: tensor<?x?xf64>,
 // CHECK-SAME:                %[[VAL_1:.*]]: tensor<?x?xf64>) -> tensor<?x?xf64> {
+// CHECK:           %[[VAL_2:.*]] = constant 2.000000e+00 : f64
 // CHECK:           %[[VAL_3:.*]] = constant 0 : index
 // CHECK:           %[[VAL_4:.*]] = constant 1 : index
-// CHECK:           %[[VAL_2:.*]] = constant 2.000000e+00 : f64
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<?x?xf64> to memref<?xf64>
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
index cf8ba3c..32d2e01 100644
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -336,7 +336,7 @@ func @aligned_promote_fill(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
   return
 }
 // CHECK-LABEL: func @aligned_promote_fill
-// CHECK:	  %[[cf:.*]] = constant 1.0{{.*}} : f32
+// CHECK:	  %[[cf:.*]] = constant {{.*}} : f32
 // CHECK:         %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:         %[[a0:.*]] = alloc({{%.*}}) {alignment = 32 : i64} : memref<?xi8>
 // CHECK:         %[[v0:.*]] = std.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
diff --git a/mlir/test/Dialect/Quant/convert-const.mlir b/mlir/test/Dialect/Quant/convert-const.mlir
index 19b2bba..bb8f8cf 100644
--- a/mlir/test/Dialect/Quant/convert-const.mlir
+++ b/mlir/test/Dialect/Quant/convert-const.mlir
@@ -144,9 +144,9 @@ func @const_custom_storage_range_i8_fixedpoint() -> tensor<7xf32> {
 // CHECK-LABEL: zero_tensors_to_zero_points
 func @zero_tensors_to_zero_points() -> (tensor<7xf32>, tensor<7xf32>, tensor<7xf32>, tensor<7xf32>) {
 
-// CHECK: %[[cst1:.*]] = constant dense<1> : tensor<7xi8>
 // CHECK: %[[cst:.*]] = constant dense<-127> : tensor<7xi8>
 // CHECK: %[[cst0:.*]] = constant dense<0> : tensor<7xi8>
+// CHECK: %[[cst1:.*]] = constant dense<1> : tensor<7xi8>
 // CHECK: "quant.scast"(%[[cst0]]) : (tensor<7xi8>) -> tensor<7x!quant.uniform<i8:f32, 1.000000e+00>>
 // CHECK: "quant.scast"(%[[cst]]) : (tensor<7xi8>) -> tensor<7x!quant.uniform<i8<-127:127>:f32, 1.000000e+00:-127>>
 // CHECK: "quant.scast"(%[[cst0]]) : (tensor<7xi8>) -> tensor<7x!quant.uniform<u8:f32, 1.000000e+00>>
@@ -176,10 +176,10 @@ func @zero_tensors_to_zero_points() -> (tensor<7xf32>, tensor<7xf32>, tensor<7xf
 // CHECK-LABEL: per_axis_dense_quantization
 func @per_axis_dense_quantization() -> (tensor<2x3xf32>, tensor<2x3xf32>) {
 
-// CHECK-NEXT: %[[cst0:.*]] = constant dense<{{\[}}[-128, -1, 1], [127, 1, 3]]> : tensor<2x3xi8>
 // CHECK-NEXT: %[[cst:.*]] = constant dense<{{\[}}[-128, 64, 127], [0, 1, 2]]> : tensor<2x3xi8>
+// CHECK-NEXT: %[[cst0:.*]] = constant dense<{{\[}}[-128, -1, 1], [127, 1, 3]]> : tensor<2x3xi8>
 // CHECK: "quant.scast"(%[[cst]]) : (tensor<2x3xi8>) -> tensor<2x3x!quant.uniform<i8:f32:0, {7.812500e-03:128,1.000000e+00}>>
-// CHECK: "quant.scast"(%[[cst0]]) : (tensor<2x3xi8>) -> tensor<2x3x!quant.uniform<i8:f32:1, {7.812500e-03:128,1.000000e+00,1.000000e+00:1}>>
+// CHECK: "quant.scast"(%cst_0) : (tensor<2x3xi8>) -> tensor<2x3x!quant.uniform<i8:f32:1, {7.812500e-03:128,1.000000e+00,1.000000e+00:1}>>
 
   %cst = constant dense<[[-2.0, -0.5, 0.0], [0.0, 1.0, 2.0]]> : tensor<2x3xf32>
   %1 = "quant.qcast"(%cst) : (tensor<2x3xf32>) -> tensor<2x3x!quant.uniform<i8:f32:0, {7.812500e-03:128, 1.0}>>
diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index 7405b01..0d7c4ee 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -21,12 +21,12 @@ func @single_iteration(%A: memref<?x?x?xi32>) {
 
 // CHECK-LABEL:   func @single_iteration(
 // CHECK-SAME:                        [[ARG0:%.*]]: memref<?x?x?xi32>) {
-// CHECK:           [[C42:%.*]] = constant 42 : i32
-// CHECK:           [[C7:%.*]] = constant 7 : index
-// CHECK:           [[C6:%.*]] = constant 6 : index
-// CHECK:           [[C3:%.*]] = constant 3 : index
-// CHECK:           [[C2:%.*]] = constant 2 : index
 // CHECK:           [[C0:%.*]] = constant 0 : index
+// CHECK:           [[C2:%.*]] = constant 2 : index
+// CHECK:           [[C3:%.*]] = constant 3 : index
+// CHECK:           [[C6:%.*]] = constant 6 : index
+// CHECK:           [[C7:%.*]] = constant 7 : index
+// CHECK:           [[C42:%.*]] = constant 42 : i32
 // CHECK:           scf.parallel ([[V0:%.*]]) = ([[C3]]) to ([[C6]]) step ([[C2]]) {
 // CHECK:             store [[C42]], [[ARG0]]{{\[}}[[C0]], [[V0]], [[C7]]] : memref<?x?x?xi32>
 // CHECK:             scf.yield
diff --git a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
index 2335c4d..cc1db79 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir
@@ -92,9 +92,9 @@ func @convert_bitcast_multi_use(%arg0 : vector<2xf32>, %arg1 : !spv.ptr<i64, Uni
 
 // CHECK-LABEL: extract_vector
 func @extract_vector() -> (i32, i32, i32) {
-  // CHECK: spv.Constant 6 : i32
-  // CHECK: spv.Constant -33 : i32
   // CHECK: spv.Constant 42 : i32
+  // CHECK: spv.Constant -33 : i32
+  // CHECK: spv.Constant 6 : i32
   %0 = spv.Constant dense<[42, -33, 6]> : vector<3xi32>
   %1 = spv.CompositeExtract %0[0 : i32] : vector<3xi32>
   %2 = spv.CompositeExtract %0[1 : i32] : vector<3xi32>
@@ -106,8 +106,8 @@ func @extract_vector() -> (i32, i32, i32) {
 
 // CHECK-LABEL: extract_array_final
 func @extract_array_final() -> (i32, i32) {
-  // CHECK: spv.Constant -5 : i32
   // CHECK: spv.Constant 4 : i32
+  // CHECK: spv.Constant -5 : i32
   %0 = spv.Constant [dense<[4, -5]> : vector<2xi32>] : !spv.array<1 x vector<2xi32>>
   %1 = spv.CompositeExtract %0[0 : i32, 0 : i32] : !spv.array<1 x vector<2 x i32>>
   %2 = spv.CompositeExtract %0[0 : i32, 1 : i32] : !spv.array<1 x vector<2 x i32>>
@@ -192,9 +192,9 @@ func @const_fold_scalar_iadd_normal() -> (i32, i32, i32) {
   %c5 = spv.Constant 5 : i32
   %cn8 = spv.Constant -8 : i32
 
-  // CHECK: spv.Constant -3
-  // CHECK: spv.Constant -16
   // CHECK: spv.Constant 10
+  // CHECK: spv.Constant -16
+  // CHECK: spv.Constant -3
   %0 = spv.IAdd %c5, %c5 : i32
   %1 = spv.IAdd %cn8, %cn8 : i32
   %2 = spv.IAdd %c5, %cn8 : i32
@@ -210,17 +210,17 @@ func @const_fold_scalar_iadd_flow() -> (i32, i32, i32, i32) {
   %c5 = spv.Constant -1 : i32          //         : 0xffff ffff
   %c6 = spv.Constant -2 : i32          //         : 0xffff fffe
 
-  // 0x8000 0000 + 0xffff fffe = 0x1 7fff fffe -> 0x7fff fffe
-  // CHECK: spv.Constant 2147483646
-  // 0x8000 0000 + 0xffff ffff = 0x1 7fff ffff -> 0x7fff ffff
-  // CHECK: spv.Constant 2147483647
-  // 0x0000 0002 + 0xffff ffff = 0x1 0000 0001 -> 0x0000 0001
-  // CHECK: spv.Constant 1
   // 0x0000 0001 + 0xffff ffff = 0x1 0000 0000 -> 0x0000 0000
   // CHECK: spv.Constant 0
   %0 = spv.IAdd %c1, %c3 : i32
-   %1 = spv.IAdd %c2, %c3 : i32
+  // 0x0000 0002 + 0xffff ffff = 0x1 0000 0001 -> 0x0000 0001
+  // CHECK: spv.Constant 1
+  %1 = spv.IAdd %c2, %c3 : i32
+  // 0x8000 0000 + 0xffff ffff = 0x1 7fff ffff -> 0x7fff ffff
+  // CHECK: spv.Constant 2147483647
   %2 = spv.IAdd %c4, %c5 : i32
+  // 0x8000 0000 + 0xffff fffe = 0x1 7fff fffe -> 0x7fff fffe
+  // CHECK: spv.Constant 2147483646
   %3 = spv.IAdd %c4, %c6 : i32
   return %0, %1, %2, %3: i32, i32, i32, i32
 }
@@ -259,9 +259,9 @@ func @const_fold_scalar_imul_normal() -> (i32, i32, i32) {
   %cn8 = spv.Constant -8 : i32
   %c7 = spv.Constant 7 : i32
 
-  // CHECK: spv.Constant -56
-  // CHECK: spv.Constant -40
   // CHECK: spv.Constant 35
+  // CHECK: spv.Constant -40
+  // CHECK: spv.Constant -56
   %0 = spv.IMul %c7, %c5 : i32
   %1 = spv.IMul %c5, %cn8 : i32
   %2 = spv.IMul %cn8, %c7 : i32
@@ -275,14 +275,13 @@ func @const_fold_scalar_imul_flow() -> (i32, i32, i32) {
   %c3 = spv.Constant 4294967295 : i32  // 2^32 - 1 : 0xffff ffff
   %c4 = spv.Constant 2147483647 : i32  // 2^31 - 1 : 0x7fff ffff
 
-  // (0x7fff ffff << 2) = 0x1 ffff fffc -> 0xffff fffc
-  // CHECK: %[[CST4:.*]] = spv.Constant -4
-
   // (0xffff ffff << 1) = 0x1 ffff fffe -> 0xffff fffe
   // CHECK: %[[CST2:.*]] = spv.Constant -2
   %0 = spv.IMul %c1, %c3 : i32
   // (0x7fff ffff << 1) = 0x0 ffff fffe -> 0xffff fffe
   %1 = spv.IMul %c1, %c4 : i32
+  // (0x7fff ffff << 2) = 0x1 ffff fffc -> 0xffff fffc
+  // CHECK: %[[CST4:.*]] = spv.Constant -4
   %2 = spv.IMul %c4, %c2 : i32
   // CHECK: return %[[CST2]], %[[CST2]], %[[CST4]]
   return %0, %1, %2: i32, i32, i32
@@ -318,9 +317,9 @@ func @const_fold_scalar_isub_normal() -> (i32, i32, i32) {
   %cn8 = spv.Constant -8 : i32
   %c7 = spv.Constant 7 : i32
 
-  // CHECK: spv.Constant -15
-  // CHECK: spv.Constant 13
   // CHECK: spv.Constant 2
+  // CHECK: spv.Constant 13
+  // CHECK: spv.Constant -15
   %0 = spv.ISub %c7, %c5 : i32
   %1 = spv.ISub %c5, %cn8 : i32
   %2 = spv.ISub %cn8, %c7 : i32
@@ -336,17 +335,17 @@ func @const_fold_scalar_isub_flow() -> (i32, i32, i32, i32) {
   %c5 = spv.Constant -1 : i32          //          : 0xffff ffff
   %c6 = spv.Constant -2 : i32          //          : 0xffff fffe
 
-  // 0xffff ffff - 0x7fff ffff -> 0xffff ffff + 0x8000 0001 = 0x1 8000 0000
-  // CHECK: spv.Constant -2147483648
-  // 0x0000 0001 - 0xffff ffff -> 0x0000 0001 + 0x0000 0001 = 0x0000 0002
-  // CHECK: spv.Constant 2
   // 0x0000 0000 - 0xffff ffff -> 0x0000 0000 + 0x0000 0001 = 0x0000 0001
   // CHECK: spv.Constant 1
-  // 0xffff fffe - 0x7fff ffff -> 0xffff fffe + 0x8000 0001 = 0x1 7fff ffff
-  // CHECK: spv.Constant 2147483647
   %0 = spv.ISub %c1, %c3 : i32
+  // 0x0000 0001 - 0xffff ffff -> 0x0000 0001 + 0x0000 0001 = 0x0000 0002
+  // CHECK: spv.Constant 2
   %1 = spv.ISub %c2, %c3 : i32
+  // 0xffff ffff - 0x7fff ffff -> 0xffff ffff + 0x8000 0001 = 0x1 8000 0000
+  // CHECK: spv.Constant -2147483648
   %2 = spv.ISub %c5, %c4 : i32
+  // 0xffff fffe - 0x7fff ffff -> 0xffff fffe + 0x8000 0001 = 0x1 7fff ffff
+  // CHECK: spv.Constant 2147483647
   %3 = spv.ISub %c6, %c4 : i32
   return %0, %1, %2, %3: i32, i32, i32, i32
 }
@@ -546,14 +545,12 @@ func @canonicalize_selection_op_vector_type(%cond: i1) -> () {
 
 // -----
 
-// CHECK-LABEL: cannot_canonicalize_selection_op_0
-
 // Store to a different variables.
 func @cannot_canonicalize_selection_op_0(%cond: i1) -> () {
   %0 = spv.Constant dense<[0, 1, 2]> : vector<3xi32>
-  // CHECK: %[[SRC_VALUE_1:.*]] = spv.Constant dense<[2, 3, 4]> : vector<3xi32>
   // CHECK: %[[SRC_VALUE_0:.*]] = spv.Constant dense<[1, 2, 3]> : vector<3xi32>
   %1 = spv.Constant dense<[1, 2, 3]> : vector<3xi32>
+  // CHECK: %[[SRC_VALUE_1:.*]] = spv.Constant dense<[2, 3, 4]> : vector<3xi32>
   %2 = spv.Constant dense<[2, 3, 4]> : vector<3xi32>
   // CHECK: %[[DST_VAR_0:.*]] = spv.Variable init({{%.*}}) : !spv.ptr<vector<3xi32>, Function>
   %3 = spv.Variable init(%0) : !spv.ptr<vector<3xi32>, Function>
@@ -585,8 +582,6 @@ func @cannot_canonicalize_selection_op_0(%cond: i1) -> () {
 
 // -----
 
-// CHECK-LABEL: cannot_canonicalize_selection_op_1
-
 // A conditional block consists of more than 2 operations.
 func @cannot_canonicalize_selection_op_1(%cond: i1) -> () {
   %0 = spv.Constant dense<[0, 1, 2]> : vector<3xi32>
@@ -623,8 +618,6 @@ func @cannot_canonicalize_selection_op_1(%cond: i1) -> () {
 
 // -----
 
-// CHECK-LABEL: cannot_canonicalize_selection_op_2
-
 // A control-flow goes into `^then` block from `^else` block.
 func @cannot_canonicalize_selection_op_2(%cond: i1) -> () {
   %0 = spv.Constant dense<[0, 1, 2]> : vector<3xi32>
@@ -657,13 +650,11 @@ func @cannot_canonicalize_selection_op_2(%cond: i1) -> () {
 
 // -----
 
-// CHECK-LABEL: cannot_canonicalize_selection_op_3
-
 // `spv.Return` as a block terminator.
 func @cannot_canonicalize_selection_op_3(%cond: i1) -> () {
   %0 = spv.Constant dense<[0, 1, 2]> : vector<3xi32>
-  %1 = spv.Constant dense<[1, 2, 3]> : vector<3xi32>
   // CHECK: %[[SRC_VALUE_0:.*]] = spv.Constant dense<[1, 2, 3]> : vector<3xi32>
+  %1 = spv.Constant dense<[1, 2, 3]> : vector<3xi32>
   // CHECK: %[[SRC_VALUE_1:.*]] = spv.Constant dense<[2, 3, 4]> : vector<3xi32>
   %2 = spv.Constant dense<[2, 3, 4]> : vector<3xi32>
   // CHECK: %[[DST_VAR:.*]] = spv.Variable init({{%.*}}) : !spv.ptr<vector<3xi32>, Function>
@@ -691,8 +682,6 @@ func @cannot_canonicalize_selection_op_3(%cond: i1) -> () {
 
 // -----
 
-// CHECK-LABEL: cannot_canonicalize_selection_op_4
-
 // Different memory access attributes.
 func @cannot_canonicalize_selection_op_4(%cond: i1) -> () {
   %0 = spv.Constant dense<[0, 1, 2]> : vector<3xi32>
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 9fed1ce..f975a51 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -69,9 +69,6 @@ func @fold_extract(%arg0 : index) -> (f32, f16, f16, i32) {
   %const_0 = constant 0 : index
   %const_1 = constant 1 : index
   %const_3 = constant 3 : index
-  // CHECK-NEXT: [[C64:%.+]] = constant 64 : i32
-  // CHECK-NEXT: [[C0:%.+]] = constant 0.{{0*}}e+00 : f16
-  // CHECK-NEXT: [[CM2:%.+]] = constant -2.{{0*}}e+00 : f16
 
   // Fold an extract into a splat.
   // CHECK-NEXT: [[C4:%.+]] = constant 4.{{0*}}e+00 : f32
@@ -79,15 +76,18 @@ func @fold_extract(%arg0 : index) -> (f32, f16, f16, i32) {
   %ext_1 = tensor.extract %0[%arg0] : tensor<4xf32>
 
   // Fold an extract into a sparse with a sparse index.
+  // CHECK-NEXT: [[CM2:%.+]] = constant -2.{{0*}}e+00 : f16
   %1 = constant sparse<[[0, 0, 0], [1, 1, 1]],  [-5.0, -2.0]> : tensor<4x4x4xf16>
   %ext_2 = tensor.extract %1[%const_1, %const_1, %const_1] : tensor<4x4x4xf16>
 
   // Fold an extract into a sparse with a non sparse index.
+  // CHECK-NEXT: [[C0:%.+]] = constant 0.{{0*}}e+00 : f16
   %2 = constant sparse<[[1, 1, 1]],  [-2.0]> : tensor<1x1x1xf16>
   %ext_3 = tensor.extract %2[%const_0, %const_0, %const_0] : tensor<1x1x1xf16>
 
   // Fold an extract into a dense tensor.
-   %3 = constant dense<[[[1, -2, 1, 36]], [[0, 2, -1, 64]]]> : tensor<2x1x4xi32>
+  // CHECK-NEXT: [[C64:%.+]] = constant 64 : i32
+  %3 = constant dense<[[[1, -2, 1, 36]], [[0, 2, -1, 64]]]> : tensor<2x1x4xi32>
   %ext_4 = tensor.extract %3[%const_1, %const_0, %const_3] : tensor<2x1x4xi32>
 
   // CHECK-NEXT: return [[C4]], [[CM2]], [[C0]], [[C64]]
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index b68e11f..17da655 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -234,10 +234,10 @@ func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
   // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
   %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32>
   %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32>
-  // CHECK: [[T1:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
+  // CHECK-NOT: transpose
   %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
   %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
-  // CHECK: [[MUL:%.*]] = mulf [[T0]], [[T1]]
+  // CHECK: [[MUL:%.*]] = mulf [[T0]], [[T0]]
   %4 = mulf %1, %3 : vector<2x3x4xf32>
   // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0]
   %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
@@ -571,10 +571,10 @@ func @bitcast_folding(%I1: vector<4x8xf32>, %I2: vector<2xi32>) -> (vector<4x8xf
 }
 
 // CHECK-LABEL: func @bitcast_f16_to_f32
-//              bit pattern: 0x40004000
-//       CHECK: %[[CST1:.+]] = constant dense<2.00390625> : vector<4xf32>
 //              bit pattern: 0x00000000
 //       CHECK: %[[CST0:.+]] = constant dense<0.000000e+00> : vector<4xf32>
+//              bit pattern: 0x40004000
+//       CHECK: %[[CST1:.+]] = constant dense<2.00390625> : vector<4xf32>
 //       CHECK: return %[[CST0]], %[[CST1]]
 func @bitcast_f16_to_f32() -> (vector<4xf32>, vector<4xf32>) {
   %cst0 = constant dense<0.0> : vector<8xf16> // bit pattern: 0x0000
@@ -612,8 +612,8 @@ func @broadcast_folding2() -> vector<4x16xi32> {
 // -----
 
 // CHECK-LABEL: shape_cast_constant
-//       CHECK: %[[CST1:.*]] = constant dense<1> : vector<3x4x2xi32>
 //       CHECK: %[[CST0:.*]] = constant dense<2.000000e+00> : vector<20x2xf32>
+//       CHECK: %[[CST1:.*]] = constant dense<1> : vector<3x4x2xi32>
 //       CHECK: return %[[CST0]], %[[CST1]] : vector<20x2xf32>, vector<3x4x2xi32>
 func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) {
   %cst = constant dense<2.000000e+00> : vector<5x4x2xf32>
@@ -626,8 +626,8 @@ func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) {
 // -----
 
 // CHECK-LABEL: extract_strided_constant
-//       CHECK: %[[CST1:.*]] = constant dense<1> : vector<2x13x3xi32>
 //       CHECK: %[[CST0:.*]] = constant dense<2.000000e+00> : vector<12x2xf32>
+//       CHECK: %[[CST1:.*]] = constant dense<1> : vector<2x13x3xi32>
 //       CHECK: return %[[CST0]], %[[CST1]] : vector<12x2xf32>, vector<2x13x3xi32>
 func @extract_strided_constant() -> (vector<12x2xf32>, vector<2x13x3xi32>) {
   %cst = constant dense<2.000000e+00> : vector<29x7xf32>
diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
index 827847c..3adb18c 100644
--- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
@@ -431,9 +431,8 @@ func @nop_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {
 }
 
 // CHECK-LABEL: func @cancel_shape_cast
-// FIXME: PR49590
-// HECK-SAME: %[[A:.*]]: vector<16xf32>
-// HECK:      return %[[A]] : vector<16xf32>
+// CHECK-SAME: %[[A:.*]]: vector<16xf32>
+// CHECK:      return %[[A]] : vector<16xf32>
 
 func @cancel_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {
   %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<4x4xf32>
@@ -445,8 +444,8 @@ func @cancel_shape_cast(%arg0: vector<16xf32>) -> vector<16xf32> {
 // llvm.matrix operations
 // CHECK-LABEL: func @shape_casts
 func @shape_casts(%a: vector<2x2xf32>) -> (vector<4xf32>, vector<2x2xf32>) {
-  // CHECK: %[[cst22:.*]] = constant dense<0.000000e+00> : vector<2x2xf32>
   // CHECK: %[[cst:.*]] = constant dense<0.000000e+00> : vector<4xf32>
+  // CHECK: %[[cst22:.*]] = constant dense<0.000000e+00> : vector<2x2xf32>
   // CHECK: %[[ex0:.*]] = vector.extract %{{.*}}[0] : vector<2x2xf32>
   //
   // CHECK: %[[in0:.*]] = vector.insert_strided_slice %[[ex0]], %[[cst]]
diff --git a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir
index 8d51d32..c07d651 100644
--- a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir
@@ -22,12 +22,10 @@ func @transpose44_44(%arg0: vector<4x4xf32>) -> vector<4x4xf32> {
 // Folds preceding shape cast as expected,
 // no following shape cast folding expected.
 //
-// FIXME: PR49590 - shape_cast not stable.
-//
 // CHECK-LABEL: func @transpose16_44(
 // CHECK-SAME:  %[[A:.*]]: vector<16xf32>
-// HECK:       %[[T0:.*]] = vector.flat_transpose %[[A]] {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
-// HECK:       %[[T1:.*]] = vector.extract_strided_slice %[[T0]] {offsets = [0], sizes = [4], strides = [1]} : vector<16xf32> to vector<4xf32>
+// CHECK:       %[[T0:.*]] = vector.flat_transpose %[[A]] {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
+// CHECK:       %[[T1:.*]] = vector.extract_strided_slice %[[T0]] {offsets = [0], sizes = [4], strides = [1]} : vector<16xf32> to vector<4xf32>
 //
 func @transpose16_44(%arg0: vector<16xf32>) -> vector<4x4xf32> {
   %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<4x4xf32>
@@ -51,11 +49,9 @@ func @transpose44_16(%arg0: vector<4x4xf32>) -> vector<16xf32> {
 // Folds preceding shape cast as expected,
 // but FAILS to fold following cast.
 //
-// FIXME: PR49590 - shape_cast not stable.
-//
 // CHECK-LABEL: func @transpose16_16(
 // CHECK-SAME:  %[[A:.*]]: vector<16xf32>
-// HECK:       %[[T0:.*]] = vector.flat_transpose %[[A]] {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
+// CHECK:       %[[T0:.*]] = vector.flat_transpose %[[A]] {columns = 4 : i32, rows = 4 : i32} : vector<16xf32> -> vector<16xf32>
 //
 func @transpose16_16(%arg0: vector<16xf32>) -> vector<16xf32> {
   %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<4x4xf32>
diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
index 5c006fe..4757b83 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
@@ -25,11 +25,11 @@ func @split_vector_transfer_read_2d(%A: memref<?x8xf32>, %i: index, %j: index) -
   %c0 = constant 0 : index
   %f0 = constant 0.0 : f32
 
-  //  CHECK-DAG: %[[c8:.*]] = constant 8 : index
   //  CHECK-DAG: %[[c0:.*]] = constant 0 : index
+  //  CHECK-DAG: %[[c8:.*]] = constant 8 : index
+  //  CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
   //      CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
-  //  CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // %i + 4 <= dim(%A, 0)
   //      CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
   //      CHECK: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref<?x8xf32>
@@ -60,9 +60,9 @@ func @split_vector_transfer_read_2d(%A: memref<?x8xf32>, %i: index, %j: index) -
   //  LINALG-DAG: %[[c0:.*]] = constant 0 : index
   //  LINALG-DAG: %[[c4:.*]] = constant 4 : index
   //  LINALG-DAG: %[[c8:.*]] = constant 8 : index
+  //  LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
   //      LINALG: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
-  //  LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // %i + 4 <= dim(%A, 0)
   //      LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
   //      LINALG: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref<?x8xf32>
@@ -112,12 +112,12 @@ func @split_vector_transfer_read_strided_2d(
   %c0 = constant 0 : index
   %f0 = constant 0.0 : f32
 
+  //  CHECK-DAG: %[[c0:.*]] = constant 0 : index
   //  CHECK-DAG: %[[c7:.*]] = constant 7 : index
   //  CHECK-DAG: %[[c8:.*]] = constant 8 : index
-  //  CHECK-DAG: %[[c0:.*]] = constant 0 : index
+  //  CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
   //      CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
-  //  CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // %i + 4 <= dim(%A, 0)
   //      CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
   //      CHECK: %[[cmp0:.*]] = cmpi sle, %[[idx0]], %[[c7]] : index
@@ -152,9 +152,9 @@ func @split_vector_transfer_read_strided_2d(
   //  LINALG-DAG: %[[c4:.*]] = constant 4 : index
   //  LINALG-DAG: %[[c7:.*]] = constant 7 : index
   //  LINALG-DAG: %[[c8:.*]] = constant 8 : index
+  //  LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
   //      LINALG: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
-  //  LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // %i + 4 <= dim(%A, 0)
   //      LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
   //      LINALG: %[[cmp0:.*]] = cmpi sle, %[[idx0]], %[[c7]] : index
diff --git a/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir b/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir
index 4ebf29c..d5e9535 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir
@@ -1,8 +1,8 @@
 // RUN: mlir-opt %s -test-vector-transfer-unrolling-patterns | FileCheck %s
 
 // CHECK-LABEL: func @transfer_read_unroll
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
@@ -19,8 +19,8 @@ func @transfer_read_unroll(%arg0 : memref<4x4xf32>) -> vector<4x4xf32> {
 }
 
 // CHECK-LABEL: func @transfer_write_unroll
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[TUPL:.*]] = vector.extract_slices {{.*}}, [2, 2], [1, 1] : vector<4x4xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>>
 //  CHECK-NEXT:   %[[T0:.*]] = vector.tuple_get %[[TUPL]], 0 : tuple<vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>>
 //  CHECK-NEXT:   vector.transfer_write %[[T0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
@@ -39,8 +39,8 @@ func @transfer_write_unroll(%arg0 : memref<4x4xf32>, %arg1 : vector<4x4xf32>) {
 }
 
 // CHECK-LABEL: func @transfer_readwrite_unroll
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
@@ -60,8 +60,8 @@ func @transfer_readwrite_unroll(%arg0 : memref<4x4xf32>) {
 }
 
 // CHECK-LABEL: func @transfer_read_unroll_tensor
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
@@ -78,8 +78,8 @@ func @transfer_read_unroll_tensor(%arg0 : tensor<4x4xf32>) -> vector<4x4xf32> {
 }
 
 // CHECK-LABEL: func @transfer_write_unroll_tensor
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[TUPL:.*]] = vector.extract_slices {{.*}}, [2, 2], [1, 1] : vector<4x4xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>>
 //  CHECK-NEXT:   %[[T0:.*]] = vector.tuple_get %[[TUPL]], 0 : tuple<vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>, vector<2x2xf32>>
 //  CHECK-NEXT:   %[[VTW0:.*]] = vector.transfer_write %[[T0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
@@ -100,8 +100,8 @@ func @transfer_write_unroll_tensor(%arg0 : tensor<4x4xf32>,
 }
 
 // CHECK-LABEL: func @transfer_readwrite_unroll_tensor
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
 //  CHECK-NEXT:   %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir
index d5e15da..f57e5f6 100644
--- a/mlir/test/Dialect/Vector/vector-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-transforms.mlir
@@ -225,8 +225,8 @@ func @contraction4x4_ikj(%arg0 : vector<4x2xf32>, %arg1 : vector<2x4xf32>,
 
 // CHECK-LABEL: func @contraction4x4_ikj_xfer_read
 
-// CHECK:      %[[C2:.*]] = constant 2 : index
 // CHECK:      %[[C0:.*]] = constant 0 : index
+// CHECK:      %[[C2:.*]] = constant 2 : index
 
 // Check LHS vector.transfer read is split for each user.
 
@@ -422,8 +422,8 @@ func @cancelling_shape_cast_ops(%arg0 : vector<2x4xf32>) -> vector<2x4xf32> {
 }
 
 // CHECK-LABEL: func @vector_transfers_vector_element_type
-//      CHECK: %[[C1:.*]] = constant 1 : index
 //      CHECK: %[[C0:.*]] = constant 0 : index
+//      CHECK: %[[C1:.*]] = constant 1 : index
 //      CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {masked = [false, false]} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32>
 // CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C1]], %[[C0]]], %{{.*}} {masked = [false, false]} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32>
 // CHECK-NEXT: vector.transfer_write %[[VTR0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {masked = [false, false]} : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>>
@@ -516,8 +516,8 @@ func @shape_cast_fold(%arg0 : vector<5x4x2xf32>, %arg1 : vector<3x4x2xf32>)
 
 // CHECK-LABEL: func @elementwise_unroll
 //  CHECK-SAME: (%[[ARG0:.*]]: memref<4x4xf32>, %[[ARG1:.*]]: memref<4x4xf32>)
-//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[C0:.*]] = constant 0 : index
+//       CHECK:   %[[C2:.*]] = constant 2 : index
 //       CHECK:   %[[VT0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
 //       CHECK:   %[[VT1:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
 //       CHECK:   %[[VT2:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index aa403ae..3bc3eee 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -572,7 +572,6 @@ func @indirect_call_folding() {
 //
 // CHECK-LABEL: @lowered_affine_mod
 func @lowered_affine_mod() -> (index, index) {
-// CHECK-NEXT: {{.*}} = constant 1 : index
 // CHECK-NEXT: {{.*}} = constant 41 : index
   %c-43 = constant -43 : index
   %c42 = constant 42 : index
@@ -581,6 +580,7 @@ func @lowered_affine_mod() -> (index, index) {
   %1 = cmpi slt, %0, %c0 : index
   %2 = addi %0, %c42 : index
   %3 = select %1, %2, %0 : index
+// CHECK-NEXT: {{.*}} = constant 1 : index
   %c43 = constant 43 : index
   %c42_0 = constant 42 : index
   %4 = remi_signed %c43, %c42_0 : index
@@ -598,7 +598,6 @@ func @lowered_affine_mod() -> (index, index) {
 //
 // CHECK-LABEL: func @lowered_affine_floordiv
 func @lowered_affine_floordiv() -> (index, index) {
-// CHECK-NEXT: %c1 = constant 1 : index
 // CHECK-NEXT: %c-2 = constant -2 : index
   %c-43 = constant -43 : index
   %c42 = constant 42 : index
@@ -610,6 +609,7 @@ func @lowered_affine_floordiv() -> (index, index) {
   %3 = divi_signed %2, %c42 : index
   %4 = subi %c-1, %3 : index
   %5 = select %0, %4, %3 : index
+// CHECK-NEXT: %c1 = constant 1 : index
   %c43 = constant 43 : index
   %c42_0 = constant 42 : index
   %c0_1 = constant 0 : index
@@ -724,8 +724,7 @@ func @view(%arg0 : index) -> (f32, f32, f32, f32) {
 // CHECK-LABEL: func @subview
 // CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
 func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
-  // Folded but reappears after subview folding into dim.
-  // CHECK: %[[C11:.*]] = constant 11 : index
+  // CHECK: %[[C0:.*]] = constant 0 : index
   %c0 = constant 0 : index
   // CHECK-NOT: constant 1 : index
   %c1 = constant 1 : index
@@ -734,10 +733,11 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
   // Folded but reappears after subview folding into dim.
   // CHECK: %[[C7:.*]] = constant 7 : index
   %c7 = constant 7 : index
+  // Folded but reappears after subview folding into dim.
+  // CHECK: %[[C11:.*]] = constant 11 : index
   %c11 = constant 11 : index
   // CHECK-NOT: constant 15 : index
   %c15 = constant 15 : index
-  // CHECK: %[[C0:.*]] = constant 0 : index
 
   // CHECK: %[[ALLOC0:.*]] = alloc()
   %0 = alloc() : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]>
@@ -895,8 +895,8 @@ func @index_cast_fold() -> (i16, index) {
   %1 = index_cast %c4 : index to i16
   %c4_i16 = constant 4 : i16
   %2 = index_cast %c4_i16 : i16 to index
-  // CHECK: %[[C4:.*]] = constant 4 : index
   // CHECK: %[[C4_I16:.*]] = constant 4 : i16
+  // CHECK: %[[C4:.*]] = constant 4 : index
   // CHECK: return %[[C4_I16]], %[[C4]] : i16, index
   return %1, %2 : i16, index
 }
diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir
index 7ce5110..2bd78be 100644
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -28,15 +28,15 @@ func @parallel_many_dims() {
   return
 }
 
-// CHECK: [[C12:%.*]] = constant 12 : index
-// CHECK: [[C10:%.*]] = constant 10 : index
-// CHECK: [[C9:%.*]] = constant 9 : index
+// CHECK: [[C3:%.*]] = constant 3 : index
 // CHECK: [[C6:%.*]] = constant 6 : index
+// CHECK: [[C9:%.*]] = constant 9 : index
+// CHECK: [[C10:%.*]] = constant 10 : index
 // CHECK: [[C4:%.*]] = constant 4 : index
-// CHECK: [[C3:%.*]] = constant 3 : index
-// CHECK: [[C2:%.*]] = constant 2 : index
-// CHECK: [[C1:%.*]] = constant 1 : index
+// CHECK: [[C12:%.*]] = constant 12 : index
 // CHECK: [[C0:%.*]] = constant 0 : index
+// CHECK: [[C1:%.*]] = constant 1 : index
+// CHECK: [[C2:%.*]] = constant 2 : index
 // CHECK: scf.parallel ([[NEW_I0:%.*]]) = ([[C0]]) to ([[C4]]) step ([[C1]]) {
 // CHECK:   [[V0:%.*]] = remi_signed [[NEW_I0]], [[C2]] : index
 // CHECK:   [[I0:%.*]] = divi_signed [[NEW_I0]], [[C2]] : index
diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
index 496f735..2a516c4 100644
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -14,13 +14,13 @@ func @collapse_to_single() {
 }
 
 // CHECK-LABEL: func @collapse_to_single() {
-// CHECK-DAG:         [[C18:%.*]] = constant 18 : index
-// CHECK-DAG:         [[C6:%.*]] = constant 6 : index
-// CHECK-DAG:         [[C3:%.*]] = constant 3 : index
-// CHECK-DAG:         [[C7:%.*]] = constant 7 : index
-// CHECK-DAG:         [[C4:%.*]] = constant 4 : index
-// CHECK-DAG:         [[C1:%.*]] = constant 1 : index
-// CHECK-DAG:         [[C0:%.*]] = constant 0 : index
+// CHECK:         [[C7:%.*]] = constant 7 : index
+// CHECK:         [[C3:%.*]] = constant 3 : index
+// CHECK:         [[C4:%.*]] = constant 4 : index
+// CHECK:         [[C18:%.*]] = constant 18 : index
+// CHECK:         [[C6:%.*]] = constant 6 : index
+// CHECK:         [[C0:%.*]] = constant 0 : index
+// CHECK:         [[C1:%.*]] = constant 1 : index
 // CHECK:         scf.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) {
 // CHECK:           [[I0_COUNT:%.*]] = remi_signed [[NEW_I]], [[C6]] : index
 // CHECK:           [[I1_COUNT:%.*]] = divi_signed [[NEW_I]], [[C6]] : index
diff --git a/mlir/test/Transforms/test-canonicalize.mlir b/mlir/test/Transforms/test-canonicalize.mlir
index c0033a2..cc6af03 100644
--- a/mlir/test/Transforms/test-canonicalize.mlir
+++ b/mlir/test/Transforms/test-canonicalize.mlir
@@ -52,25 +52,6 @@ func @test_commutative_multi(%arg0: i32, %arg1: i32) -> (i32, i32) {
   return %y, %z: i32, i32
 }
 
-
-// CHECK-LABEL: func @test_commutative_multi_cst
-func @test_commutative_multi_cst(%arg0: i32, %arg1: i32) -> (i32, i32) {
-  // CHECK-NEXT: %c42_i32 = constant 42 : i32
-  %c42_i32 = constant 42 : i32
-  %c42_i32_2 = constant 42 : i32
-  // CHECK-NEXT: %[[O0:.*]] = "test.op_commutative"(%arg0, %arg1, %c42_i32, %c42_i32) : (i32, i32, i32, i32) -> i32
-  %y = "test.op_commutative"(%c42_i32, %arg0, %arg1, %c42_i32_2) : (i32, i32, i32, i32) -> i32
-
-  %c42_i32_3 = constant 42 : i32
-
-  // CHECK-NEXT: %[[O1:.*]] = "test.op_commutative"(%arg0, %arg1, %c42_i32, %c42_i32) : (i32, i32, i32, i32) -> i32
-  %z = "test.op_commutative"(%arg0, %c42_i32_3, %c42_i32_2, %arg1): (i32, i32, i32, i32) -> i32
-  // CHECK-NEXT: return %[[O0]], %[[O1]]
-  return %y, %z: i32, i32
-}
-
-// CHECK-LABEL: func @typemismatch
-
 func @typemismatch() -> i32 {
   %c42 = constant 42.0 : f32
 
diff --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir
index dd277e2..0425cf8 100644
--- a/mlir/test/mlir-tblgen/pattern.mlir
+++ b/mlir/test/mlir-tblgen/pattern.mlir
@@ -5,8 +5,8 @@ func @verifyFusedLocs(%arg0 : i32) -> i32 {
   %0 = "test.op_a"(%arg0) {attr = 10 : i32} : (i32) -> i32 loc("a")
   %result = "test.op_a"(%0) {attr = 20 : i32} : (i32) -> i32 loc("b")
 
-  // CHECK:  %0 = "test.op_b"(%arg0) {attr = 10 : i32} : (i32) -> i32 loc("a")
-  // CHECK:  %1 = "test.op_b"(%0) {attr = 20 : i32} : (i32) -> i32 loc("b")
+  // CHECK: "test.op_b"(%arg0) {attr = 10 : i32} : (i32) -> i32 loc("a")
+  // CHECK: "test.op_b"(%arg0) {attr = 20 : i32} : (i32) -> i32 loc(fused["b", "a"])
   return %result : i32
 }
 
@@ -67,7 +67,7 @@ func @verifyBenefit(%arg0 : i32) -> i32 {
   %2 = "test.op_g"(%1) : (i32) -> i32
 
   // CHECK: "test.op_f"(%arg0)
-  // CHECK: "test.op_b"(%arg0) {attr = 20 : i32}
+  // CHECK: "test.op_b"(%arg0) {attr = 34 : i32}
   return %0 : i32
 }
 
-- 
2.7.4