From 5fc28ebbaf11808f0a010239f46875458589ce0d Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nicolas.vasilache@gmail.com>
Date: Tue, 4 Oct 2022 05:14:30 -0700
Subject: [PATCH] [mlir][Linalg] NFC - Add bbarg pretty printing to
 linalg::generic

Differential Revision: https://reviews.llvm.org/D135151
---
 .../mlir/Dialect/Linalg/IR/LinalgStructuredOps.td  |   4 +-
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp           |  11 +-
 mlir/test/Analysis/test-match-reduction.mlir       |  10 +-
 .../TosaToLinalg/tosa-to-linalg-named.mlir         |  39 +-
 .../Conversion/TosaToLinalg/tosa-to-linalg.mlir    | 396 ++++++++++++---------
 .../Linalg/canonicalize-duplicate-inputs.mlir      |  18 +-
 mlir/test/Dialect/Linalg/decompose-ops.mlir        | 166 ++++-----
 .../Dialect/Linalg/fusion-elementwise-ops.mlir     |  10 +-
 mlir/test/Dialect/Linalg/lower-pad-tensor.mlir     |   4 +-
 mlir/test/Dialect/Linalg/reshape_fusion.mlir       |  12 +-
 10 files changed, 377 insertions(+), 293 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 3d1ee2f..1691291 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -58,7 +58,9 @@ class LinalgStructuredBase_Op<string mnemonic, list<Trait> props>
 // Generic Linalg ops.
 //===----------------------------------------------------------------------===//
 
-def GenericOp : LinalgStructuredBase_Op<"generic", [AttrSizedOperandSegments]> {
+def GenericOp : LinalgStructuredBase_Op<"generic", [
+    DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmBlockArgumentNames"]>,
+    AttrSizedOperandSegments]> {
   let description = [{
     Generic Linalg op form where the key properties of the computation are
     specified as attributes. In pretty form, a `linalg.generic` op is written
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 3741e7d..ba60572 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -663,8 +663,17 @@ void FillOp::getCanonicalizationPatterns(RewritePatternSet &results,
 }
 
 //===----------------------------------------------------------------------===//
-// GenericOps
+// GenericOp
 //===----------------------------------------------------------------------===//
+
+void GenericOp::getAsmBlockArgumentNames(Region &region,
+                                         OpAsmSetValueNameFn setNameFn) {
+  for (Value v : getRegionInputArgs())
+    setNameFn(v, "in");
+  for (Value v : getRegionOutputArgs())
+    setNameFn(v, "out");
+}
+
 void GenericOp::build(
     OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
     ValueRange inputs, ValueRange outputs, ArrayAttr indexingMaps,
diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir
index ef99e76..ecc74c6 100644
--- a/mlir/test/Analysis/test-match-reduction.mlir
+++ b/mlir/test/Analysis/test-match-reduction.mlir
@@ -7,7 +7,7 @@
 func.func @linalg_red_add(%in0t : tensor<?xf32>, %out0t : tensor<1xf32>) {
   // expected-remark@below {{Reduction found in output #0!}}
   // expected-remark@below {{Reduced Value: <block argument> of type 'f32' at index: 0}}
-  // expected-remark@below {{Combiner Op: %1 = arith.addf %arg2, %arg3 : f32}}
+  // expected-remark@below {{Combiner Op: %1 = arith.addf }}
   %red = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                           affine_map<(d0) -> (0)>],
                                           iterator_types = ["reduction"]}
@@ -27,8 +27,8 @@ func.func @affine_red_add(%in: memref<256x512xf32>, %out: memref<256xf32>) {
  %cst = arith.constant 0.000000e+00 : f32
  affine.for %i = 0 to 256 {
    // expected-remark@below {{Reduction found in output #0!}}
-   // expected-remark@below {{Reduced Value: %1 = affine.load %arg0[%arg2, %arg3] : memref<256x512xf32>}}
-   // expected-remark@below {{Combiner Op: %2 = arith.addf %arg4, %1 : f32}}
+   // expected-remark@below {{Reduced Value: %1 = affine.load }}
+   // expected-remark@below {{Combiner Op: %2 = arith.addf }}
    %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
      %ld = affine.load %in[%i, %j] : memref<256x512xf32>
      %add = arith.addf %red_iter, %ld : f32
@@ -63,8 +63,8 @@ func.func @linalg_red_max(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
 // expected-remark@below {{Testing function}}
 func.func @linalg_fused_red_add(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
   // expected-remark@below {{Reduction found in output #0!}}
-  // expected-remark@below {{Reduced Value: %2 = arith.subf %1, %arg2 : f32}}
-  // expected-remark@below {{Combiner Op: %3 = arith.addf %2, %arg3 : f32}}
+  // expected-remark@below {{Reduced Value: %2 = arith.subf}}
+  // expected-remark@below {{Combiner Op: %3 = arith.addf}}
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index 9bc5db3..811bf28 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -80,8 +80,8 @@ func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2
   // CHECK: [[INITB:%.+]] = tensor.empty()
   // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
   // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
-  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-  // CHECK:   [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
+  // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
 
   %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>)  -> (tensor<5x6xf32>)
@@ -129,8 +129,8 @@ func.func @fully_connected_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<6x3xf32>, %
   // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
   // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
-  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-  // CHECK:   %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
+  // CHECK:   %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield %[[ADD]] : f32
 
   %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<?x3xf32>, tensor<6x3xf32>, tensor<6xf32>)  -> (tensor<?x6xf32>)
@@ -214,6 +214,7 @@ func.func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
   // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
+  // CHECK: ^bb0(%[[BBARG1:[a-zA-Z0-9_]+]]: f32,
   // CHECK:   [[ZERO:%.0]] = arith.constant 0
   // CHECK:   [[ONE:%.+]] = arith.constant 1
   // CHECK:   [[HEIGHT:%.+]] = arith.constant 4
@@ -255,7 +256,7 @@ func.func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
   // CHECK:   [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]]
   // CHECK:   [[CI:%.+]] = arith.index_cast [[C]]
   // CHECK:   [[CF:%.+]] = arith.sitofp [[CI]]
-  // CHECK:   [[RESULT:%.+]] = arith.divf %arg1, [[CF]]
+  // CHECK:   [[RESULT:%.+]] = arith.divf %[[BBARG1]], [[CF]]
   // CHECK:   linalg.yield [[RESULT]]
   %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x5x33x62xf32>)
   return %0 : tensor<1x5x33x62xf32>
@@ -286,10 +287,11 @@ func.func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () {
 
   // CHECK: linalg.pooling_nhwc_sum
   // CHECK: linalg.generic
+  // CHECK: ^bb0(%[[BBARG1:[a-zA-Z0-9_]+]]: i32,
 
   // CHECK: %[[INZP:.+]] = arith.constant -128
   // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]]
-  // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]]
+  // CHECK: %[[OFFSETED:.+]] = arith.subi %[[BBARG1]], %[[INZP_OFF]]
   // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825
   // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}}
   // CHECK: %[[SHIFT:.+]] = arith.constant 30
@@ -315,10 +317,11 @@ func.func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () {
 
   // CHECK: linalg.pooling_nhwc_sum
   // CHECK: linalg.generic
+  // CHECK: ^bb0(%[[BBARG1:[a-zA-Z0-9_]+]]: i32,
 
   // CHECK: %[[INZP:.+]] = arith.constant -128
   // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]]
-  // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]]
+  // CHECK: %[[OFFSETED:.+]] = arith.subi %[[BBARG1]], %[[INZP_OFF]]
   // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825
   // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}}
   // CHECK: %[[SHIFT:.+]] = arith.constant 30
@@ -479,8 +482,8 @@ func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
-  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-  // CHECK:   [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
+  // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
   // CHECK: } -> tensor<1x5x5x33xf32>
   %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<1x5x5x33xf32>)
@@ -503,8 +506,8 @@ func.func @depthwise_conv_dyn(%arg0 : tensor<?x7x5x3xf32>, %arg1 : tensor<3x1x3x
   // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor<?x5x5x3x11xf32>)
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) outs(%[[OUT]] : tensor<?x5x5x33xf32>) {
-  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-  // CHECK:   %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
+  // CHECK:   %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield %[[ADD]] : f32
   // CHECK: } -> tensor<?x5x5x33xf32>
   %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<?x5x5x33xf32>)
@@ -525,8 +528,8 @@ func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
-  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
-  // CHECK:   [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):  
+  // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
   // CHECK: } -> tensor<1x5x5x33xf32>
   %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<1x5x5x33xf32>)
@@ -553,8 +556,8 @@ func.func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
-  // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):  
-  // CHECK:   [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):  
+  // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
   // CHECK:   linalg.yield [[ADD]] : i32
   // CHECK: } -> tensor<1x12x12x512xi32>
   %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = #tosa.conv_quant<input_zp = -128, weight_zp = 42>, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>)  -> tensor<1x12x12x512xi32>
@@ -577,8 +580,8 @@ func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 :
   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
-  // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):  
-  // CHECK:   [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):  
+  // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
   // CHECK:   linalg.yield [[ADD]] : i32
   // CHECK: } -> tensor<1x10x10x512xi32>
   %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = #tosa.conv_quant<input_zp = -128, weight_zp = 42>, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>)  -> tensor<1x10x10x512xi32>
@@ -592,7 +595,7 @@ func.func @depthwise_conv2d_dyn_w_h(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor<3x
   // CHECK: arith.muli
   // CHECK: arith.divui
   // CHECK: %[[PADDED:.+]] = tensor.pad %arg0 low[0, 1, 3, 0] high[0, 2, 4, 0] {
-  // CHECK: ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index):
   // CHECK: tensor.yield %cst : f32
   // CHECK:  } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32>
   // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32>
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index 51fb0e6..0c8af01 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -3,11 +3,12 @@
 // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
 
 // CHECK-LABEL: @test_abs
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%arg0 : tensor<f32>) outs([[INIT]] : tensor<f32>) {
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32):
-  // CHECK:   [[ELEMENT:%.+]] = math.absf %arg1
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor<f32>) outs([[INIT]] : tensor<f32>) {
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
+  // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
   // CHECK: } -> tensor<f32>
 
@@ -22,11 +23,12 @@ func.func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: @test_abs
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32):
-  // CHECK:   [[ELEMENT:%.+]] = math.absf %arg1
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
+  // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
   // CHECK: } -> tensor<2xf32>
   %0 = "tosa.abs"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
@@ -40,11 +42,12 @@ func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 
 // CHECK-LABEL: @test_abs
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32):
-  // CHECK:   [[ELEMENT:%.+]] = math.absf %arg1
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
+  // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
   // CHECK: } -> tensor<2x3xf32>
   %0 = "tosa.abs"(%arg0) : (tensor<2x3xf32>) -> tensor<2x3xf32>
@@ -56,9 +59,10 @@ func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
 // -----
 
 // CHECK-LABEL: @test_abs
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<?xf32>) -> tensor<?xf32> {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: linalg.generic
   // CHECK: math.absf
@@ -71,9 +75,10 @@ func.func @test_abs(%arg0: tensor<?xf32>) -> tensor<?xf32> {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 
 // CHECK-LABEL: @test_abs_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs_dyn(%arg0: tensor<2x?xf32>) -> tensor<2x?xf32> {
   // CHECK: %[[C1:.+]] = arith.constant 1
-  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
+  // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: linalg.generic
   // CHECK: math.absf
@@ -87,12 +92,14 @@ func.func @test_abs_dyn(%arg0: tensor<2x?xf32>) -> tensor<2x?xf32> {
 // CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: @test_broadcast
+// CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<1xf32
+// CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: tensor<2xf32>
 func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
-  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg0
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %arg1 : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
-  // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
-  // CHECK:   [[ELEMENT:%.+]] = arith.addf %arg2, %arg3 : f32
+  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG0]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
+  // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
   // CHECK: } -> tensor<2xf32>
   %0 = "tosa.add"(%arg0, %arg1) : (tensor<1xf32>, tensor<2xf32>) -> tensor<2xf32>
@@ -105,12 +112,14 @@ func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<
 // CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> ()>
 
 // CHECK-LABEL: @test_broadcast_swapped_args
+// CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<2xf32
+// CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: tensor<1xf32>
 func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
-  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg1
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
-  // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
-  // CHECK:   [[ELEMENT:%.+]] = arith.addf %arg2, %arg3 : f32
+  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG1]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
+  // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
   // CHECK: } -> tensor<2xf32>
   %0 = "tosa.add"(%arg0, %arg1) : (tensor<2xf32>, tensor<1xf32>) -> tensor<2xf32>
@@ -124,13 +133,15 @@ func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32
 // CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
 
 // CHECK-LABEL: @test_multibroadcast
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @test_multibroadcast(%arg0: tensor<1x3xf32>, %arg1: tensor<2x1xf32>) -> tensor<2x3xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
-  // CHECK: [[RESHAPE1:%.+]] = tensor.collapse_shape %arg0 {{\[}}[0, 1]]
-  // CHECK: [[RESHAPE2:%.+]] = tensor.collapse_shape %arg1 {{\[}}[0, 1]]
+  // CHECK: [[RESHAPE1:%.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]]
+  // CHECK: [[RESHAPE2:%.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0, 1]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) {
-  // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
-  // CHECK:   [[ELEMENT:%.+]] = arith.addf %arg2, %arg3 : f32
+  // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
+  // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
   // CHECK: } -> tensor<2x3xf32>
   %0 = "tosa.add"(%arg0, %arg1) : (tensor<1x3xf32>, tensor<2x1xf32>) -> tensor<2x3xf32>
@@ -315,8 +326,9 @@ func.func @test_simple_i32(%arg0: tensor<1xi32>) -> () {
   %4 = "tosa.div"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 
   // CHECK: linalg.generic
+  // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32):
   // CHECK: [[ZERO:%.+]] = arith.constant 0
-  // CHECK: arith.subi [[ZERO]], %arg1
+  // CHECK: arith.subi [[ZERO]], %[[ARG1]]
   %5 = "tosa.negate"(%arg0) : (tensor<1xi32>) -> tensor<1xi32>
 
   // CHECK: linalg.generic
@@ -503,8 +515,9 @@ func.func @test_bool(%arg0: tensor<1xi1>, %arg1: tensor<1xi1>) -> () {
 // CHECK-LABEL: @test_negate_quantized
 func.func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
   // CHECK: linalg.generic
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
   // CHECK: [[ZERO:%.+]] = arith.constant 0
-  // CHECK: [[EXT:%.+]] = arith.extsi %arg1 : i8 to i16
+  // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
   // CHECK: [[SUB:%.+]] = arith.subi [[ZERO]], [[EXT]]
   // CHECK: [[MIN:%.+]] = arith.constant -128
   // CHECK: [[MAX:%.+]] = arith.constant 127
@@ -517,11 +530,13 @@ func.func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
   %0 = "tosa.negate"(%arg0) {quantization_info = #tosa.unary_quant<input_zp = 0, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
 
   // CHECK: linalg.generic
-  // CHECK: [[EXT:%.+]] = arith.extsi %arg1 : i8 to i16
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
+  // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
   %1 = "tosa.negate"(%arg0) {quantization_info = #tosa.unary_quant<input_zp = 32639, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
 
   // CHECK: linalg.generic
-  // CHECK: [[EXT:%.+]] = arith.extsi %arg1 : i8 to i32
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
+  // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i32
   %2 = "tosa.negate"(%arg0) {quantization_info = #tosa.unary_quant<input_zp = 32640, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
 
   return
@@ -530,8 +545,9 @@ func.func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
 // -----
 
 // CHECK-LABEL: @test_reshape_downrank
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_reshape_downrank(%arg0: tensor<2x3xf32>) -> tensor<6xf32> {
-  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg0 {{\[}}[0, 1]]
+  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [6]} : (tensor<2x3xf32>) -> tensor<6xf32>
   // CHECK: return [[RESHAPE]]
   return %0 : tensor<6xf32>
@@ -540,8 +556,9 @@ func.func @test_reshape_downrank(%arg0: tensor<2x3xf32>) -> tensor<6xf32> {
 // -----
 
 // CHECK-LABEL: @test_reshape_downrank_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_reshape_downrank_dyn(%arg0: tensor<2x?xf32>) -> tensor<?xf32> {
-  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %arg0 {{\[}}[0, 1]]
+  // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [-1]} : (tensor<2x?xf32>) -> tensor<?xf32>
   // CHECK: return [[RESHAPE]]
   return %0 : tensor<?xf32>
@@ -550,8 +567,9 @@ func.func @test_reshape_downrank_dyn(%arg0: tensor<2x?xf32>) -> tensor<?xf32> {
 // -----
 
 // CHECK-LABEL: @test_reshape_uprank
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_reshape_uprank(%arg0: tensor<6xf32>) -> tensor<2x3xf32> {
-  // CHECK: [[RESHAPE:%.+]] = tensor.expand_shape %arg0 {{\[}}[0, 1]]
+  // CHECK: [[RESHAPE:%.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [2, 3]} : (tensor<6xf32>) -> tensor<2x3xf32>
   // CHECK: return [[RESHAPE]]
   return %0 : tensor<2x3xf32>
@@ -560,8 +578,9 @@ func.func @test_reshape_uprank(%arg0: tensor<6xf32>) -> tensor<2x3xf32> {
 // -----
 
 // CHECK-LABEL: @test_reshape_uprank_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_reshape_uprank_dyn(%arg0: tensor<?xf32>) -> tensor<2x?xf32> {
-  // CHECK: [[RESHAPE:%.+]] = tensor.expand_shape %arg0 {{\[}}[0, 1]]
+  // CHECK: [[RESHAPE:%.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [2, -1]} : (tensor<?xf32>) -> tensor<2x?xf32>
   // CHECK: return [[RESHAPE]]
   return %0 : tensor<2x?xf32>
@@ -570,8 +589,8 @@ func.func @test_reshape_uprank_dyn(%arg0: tensor<?xf32>) -> tensor<2x?xf32> {
 // -----
 
 // CHECK-LABEL: @test_reshape_samerank
+//  CHECK-SAME: (%[[ARG0:.*]]: tensor<3x2xf32>)
 func.func @test_reshape_samerank(%arg0: tensor<3x2xf32>) -> tensor<2x3xf32> {
-  // CHECK-SAME: (%[[ARG0:.*]]: tensor<3x2xf32>)
   // CHECK-NEXT: %[[RESHAPE1:.*]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]]
   // CHECK-NEXT: %[[RESHAPE2:.*]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [2, 3]} : (tensor<3x2xf32>) -> tensor<2x3xf32>
@@ -582,8 +601,8 @@ func.func @test_reshape_samerank(%arg0: tensor<3x2xf32>) -> tensor<2x3xf32> {
 // -----
 
 // CHECK-LABEL: @test_reshape_samerank_dyn
+//  CHECK-SAME: (%[[ARG0:.*]]: tensor<?x2xf32>)
 func.func @test_reshape_samerank_dyn(%arg0: tensor<?x2xf32>) -> tensor<2x?xf32> {
-  // CHECK-SAME: (%[[ARG0:.*]]: tensor<?x2xf32>)
   // CHECK-NEXT: %[[RESHAPE1:.*]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]]
   // CHECK-NEXT: %[[RESHAPE2:.*]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [2, -1]} : (tensor<?x2xf32>) -> tensor<2x?xf32>
@@ -594,8 +613,9 @@ func.func @test_reshape_samerank_dyn(%arg0: tensor<?x2xf32>) -> tensor<2x?xf32>
 // -----
 
 // CHECK-LABEL: @test_reshape_downrank_6D
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @test_reshape_downrank_6D(%arg0: tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32> {
-  // CHECK: tensor.collapse_shape %arg0 {{\[}}[0, 1, 2], [3], [4, 5]]
+  // CHECK: tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3], [4, 5]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [6, 5, 77]} : (tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32>
   return %0 : tensor<6x5x77xf32>
 }
@@ -603,9 +623,10 @@ func.func @test_reshape_downrank_6D(%arg0: tensor<1x2x3x5x7x11xf32>) -> tensor<6
 // -----
 
 // CHECK-LABEL: @test_reshape_downrank_6D_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @test_reshape_downrank_6D_dyn(%arg0: tensor<1x2x?x5x7x11xf32>) -> tensor<?x5x77xf32> {
-  // CHECK: tensor.collapse_shape {{.*}}[0, 1, 2, 3, 4, 5]
-  // CHECK: tensor.expand_shape {{.*}}[0, 1, 2]
+  // CHECK: tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3, 4, 5]]
+  // CHECK: tensor.expand_shape %{{.*}} {{\[}}[0, 1, 2]]
   %0 = "tosa.reshape"(%arg0) {new_shape = [-1, 5, 77]} : (tensor<1x2x?x5x7x11xf32>) -> tensor<?x5x77xf32>
   return %0 : tensor<?x5x77xf32>
 }
@@ -613,11 +634,13 @@ func.func @test_reshape_downrank_6D_dyn(%arg0: tensor<1x2x?x5x7x11xf32>) -> tens
 // -----
 
 // CHECK-LABEL: @test_identity
+// CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<1xf32>,
+// CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: tensor<1xi32>
 func.func @test_identity(%arg0: tensor<1xf32>, %arg1: tensor<1xi32>) -> (tensor<1xf32>, tensor<1xi32>) {
   %0 = "tosa.identity"(%arg0) : (tensor<1xf32>) -> tensor<1xf32>
   %1 = "tosa.identity"(%arg1) : (tensor<1xi32>) -> tensor<1xi32>
 
-  // CHECK: return %arg0, %arg1
+  // CHECK: return %[[ARG0]], %[[ARG1]]
   return %0, %1 : tensor<1xf32>, tensor<1xi32>
 }
 
@@ -649,7 +672,7 @@ func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () {
 func.func @test_transpose_dyn(%arg0: tensor<1x?x3x4xi32>) -> () {
   %0 = arith.constant dense<[1, 3, 0, 2]> : tensor<4xi32>
   // CHECK: %[[C1:.+]] = arith.constant 1
-  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
+  // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor<?x4x1x3xi32>
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor<?x4x1x3xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
@@ -669,9 +692,9 @@ func.func @test_transpose_dyn(%arg0: tensor<1x?x3x4xi32>) -> () {
 func.func @test_transpose_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
   %0 = arith.constant dense<[1, 0]> : tensor<2xi32>
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[DIM0:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[DIM0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[C1:.+]] = arith.constant 1
-  // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
+  // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs([[OUT:%.+]] : tensor<?x?xf32>)
   // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32)
@@ -694,8 +717,8 @@ func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>)
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
-  // CHECK:   [[RES:%.+]] = arith.addf %arg1, %arg2 : f32
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
+  // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield [[RES]] : f32
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xf32> into tensor<1x4xf32>
   %0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
@@ -704,8 +727,8 @@ func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>)
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
-  // CHECK:   [[RES:%.+]] = arith.addf %arg1, %arg2 : f32
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
+  // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield [[RES]] : f32
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<5xf32> into tensor<5x1xf32>
   %1 = "tosa.reduce_sum"(%arg0) {axis = 1 : i64} : (tensor<5x4xf32>) -> tensor<5x1xf32>
@@ -736,15 +759,16 @@ func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
 
 // CHECK-LABEL: @reduce_float_dyn
+// CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<?x5x4xf32>
 func.func @reduce_float_dyn(%arg0: tensor<?x5x4xf32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
   // CHECK: %[[CST0:.+]] = arith.constant 0.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg0 : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>)
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
-  // CHECK:   %[[RES:.+]] = arith.addf %arg1, %arg2 : f32
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>)
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
+  // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
   // CHECK: tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1, 2]] : tensor<?x4xf32> into tensor<?x1x4xf32>
   %0 = "tosa.reduce_sum"(%arg0) {axis = 1 : i64} : (tensor<?x5x4xf32>) -> tensor<?x1x4xf32>
@@ -757,15 +781,16 @@ func.func @reduce_float_dyn(%arg0: tensor<?x5x4xf32>) -> () {
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
 
 // CHECK-LABEL: @reduce_float_dyn_nonzero_batch
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @reduce_float_dyn_nonzero_batch(%arg0: tensor<5x?x4xf32>) -> () {
   // CHECK: %[[C1:.+]] = arith.constant 1
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C1]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
   // CHECK: %[[CST1:.+]] = arith.constant 1.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0 : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>)
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
-  // CHECK:   %[[RES:.+]] = arith.mulf %arg1, %arg2 : f32
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>)
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
+  // CHECK:   %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
   // CHECK: tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1, 2]] : tensor<5x?xf32> into tensor<5x?x1xf32>
   %0 = "tosa.reduce_prod"(%arg0) {axis = 2 : i64} : (tensor<5x?x4xf32>) -> tensor<5x?x1xf32>
@@ -778,15 +803,16 @@ func.func @reduce_float_dyn_nonzero_batch(%arg0: tensor<5x?x4xf32>) -> () {
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
 
 // CHECK-LABEL: @reduce_float_dyn_multiple
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @reduce_float_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
-  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
-  // CHECK:   %[[MAX:.+]] = arith.maxf %arg1, %arg2 : f32
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
+  // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
+  // CHECK:   %[[MAX:.+]] = arith.maxf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[MAX]] : f32
   // CHECK: tensor.expand_shape %[[GENERIC]] {{\[}}[0, 1]] : tensor<?xf32> into tensor<?x1xf32>
   %0 = "tosa.reduce_max"(%arg0) {axis = 1 : i64} : (tensor<?x?xf32>) -> tensor<?x1xf32>
@@ -806,8 +832,8 @@ func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>)
-  // CHECK: ^bb0(%arg1: i32, %arg2: i32)
-  // CHECK:   [[RES:%.+]] = arith.addi %arg1, %arg2 : i32
+  // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+  // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
   // CHECK:   linalg.yield [[RES]] : i32
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xi32> into tensor<1x4xi32>
   %0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
@@ -816,8 +842,8 @@ func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
   // CHECK: [[CST0:%.+]] = arith.constant 0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>)
-  // CHECK: ^bb0(%arg1: i32, %arg2: i32)
-  // CHECK:   [[RES:%.+]] = arith.addi %arg1, %arg2 : i32
+  // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+  // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
   // CHECK:   linalg.yield [[RES]] : i32
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<5xi32> into tensor<5x1xi32>
   %1 = "tosa.reduce_sum"(%arg0) {axis = 1 : i64} : (tensor<5x4xi32>) -> tensor<5x1xi32>
@@ -856,8 +882,8 @@ func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
   // CHECK: [[CST0:%.+]] = arith.constant true
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>)
-  // CHECK: ^bb0(%arg1: i1, %arg2: i1)
-  // CHECK:   [[RES:%.+]] = arith.andi %arg1, %arg2 : i1
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1)
+  // CHECK:   [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1
   // CHECK:   linalg.yield [[RES]] : i1
   // CHECK: tensor.expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xi1> into tensor<1x4xi1>
   %0 = "tosa.reduce_all"(%arg0) {axis = 0 : i64} : (tensor<5x4xi1>) -> tensor<1x4xi1>
@@ -874,6 +900,8 @@ func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
 // -----
 
 // CHECK-LABEL: @concat
+// CHECK-SAME: %[[ARG0:.+]]: tensor<5x1xf32>
+// CHECK-SAME: %[[ARG1:.+]]: tensor<6x1xf32>
 func.func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
   // CHECK: [[AXIS:%.+]] = arith.constant 0
   // CHECK: [[STRIDE:%.+]]   = arith.constant 1
@@ -883,8 +911,8 @@ func.func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<11x1xf32>
   // CHECK: [[CST:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
-  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]][0, 0] [5, 1] [1, 1]
-  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg1 into [[INSERT0]][5, 0] [6, 1] [1, 1]
+  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %[[ARG0]] into [[FILL]][0, 0] [5, 1] [1, 1]
+  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %[[ARG1]] into [[INSERT0]][5, 0] [6, 1] [1, 1]
   %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>)  -> (tensor<11x1xf32>)
 
   // CHECK: [[AXIS:%.+]] = arith.constant 1
@@ -895,8 +923,8 @@ func.func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5x2xf32>
   // CHECK: [[CST:%.+]] = arith.constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
-  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]][0, 0] [5, 1] [1, 1]
-  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg0 into [[INSERT0]][0, 1] [5, 1] [1, 1]
+  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %[[ARG0]] into [[FILL]][0, 0] [5, 1] [1, 1]
+  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %[[ARG0]] into [[INSERT0]][0, 1] [5, 1] [1, 1]
   %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>)  -> (tensor<5x2xf32>)
   return
 }
@@ -904,20 +932,22 @@ func.func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
 // -----
 
 // CHECK-LABEL: @concat_non_axis_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @concat_non_axis_dyn(%arg0: tensor<5x?xf32>, %arg1: tensor<6x?xf32>) -> () {
   // CHECK: %[[AXIS:.+]] = arith.constant 0
   // CHECK: %[[STRIDE:.+]]   = arith.constant 1
   // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
   // CHECK: %[[IDX0:.+]] = arith.constant 0 : index
   // CHECK: %[[IDX1:.+]] = arith.constant 1 : index
-  // CHECK: %[[SIZE:.+]] = tensor.dim %arg0, %[[IDX1]]
+  // CHECK: %[[SIZE:.+]] = tensor.dim %[[ARG0]], %[[IDX1]]
   // CHECK: %[[IDX1_2:.+]] = arith.constant 1 : index
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[IDX1_2]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[IDX1_2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<11x?xf32>
   // CHECK: %[[CST:.+]] = arith.constant 0.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %arg0 into %[[FILL]][0, 0] [5, %[[SIZE]]] [1, 1]
-  // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %arg1 into %[[INSERT0]][5, 0] [6, %[[SIZE]]] [1, 1]
+  // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][0, 0] [5, %[[SIZE]]] [1, 1]
+  // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[ARG1]] into %[[INSERT0]][5, 0] [6, %[[SIZE]]] [1, 1]
   %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x?xf32>, tensor<6x?xf32>)  -> (tensor<11x?xf32>)
   return
 }
@@ -925,23 +955,25 @@ func.func @concat_non_axis_dyn(%arg0: tensor<5x?xf32>, %arg1: tensor<6x?xf32>) -
 // -----
 
 // CHECK-LABEL: @concat_axis_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]: 
 func.func @concat_axis_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<?x3xf32>) -> () {
   // CHECK: %[[AXIS:.+]] = arith.constant 0
   // CHECK: %[[STRIDE:.+]]   = arith.constant 1
   // CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
   // CHECK: %[[IDX0:.+]] = arith.constant 0 : index
-  // CHECK: %[[SIZE:.+]] = tensor.dim %arg0, %[[IDX0]]
+  // CHECK: %[[SIZE:.+]] = tensor.dim %[[ARG0]], %[[IDX0]]
   // CHECK: %[[IDX0_2:.+]] = arith.constant 0 : index
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[IDX0_2]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[IDX0_2]]
   // CHECK: %[[IDX1:.+]] = arith.constant 1 : index
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x3xf32>
   // CHECK: %[[CST:.+]] = arith.constant 0.0
   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[DYN1:.+]] = tensor.dim %arg0, %[[AXIS]]
-  // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %arg0 into %[[FILL]][0, 0] [%[[DYN1]], 3] [1, 1]
+  // CHECK: %[[DYN1:.+]] = tensor.dim %[[ARG0]], %[[AXIS]]
+  // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][0, 0] [%[[DYN1]], 3] [1, 1]
   // CHECK: %[[SUM:.+]]  = arith.addi %[[OFFSET]], %[[DYN1]]
-  // CHECK: %[[DYN2:.+]] = tensor.dim %arg1, %[[AXIS]]
-  // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %arg1 into %[[INSERT0]][%[[SUM]], 0] [%[[DYN2]], 3] [1, 1]
+  // CHECK: %[[DYN2:.+]] = tensor.dim %[[ARG1]], %[[AXIS]]
+  // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[ARG1]] into %[[INSERT0]][%[[SUM]], 0] [%[[DYN2]], 3] [1, 1]
   %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<?x3xf32>, tensor<?x3xf32>)  -> (tensor<?x3xf32>)
   return
 }
@@ -950,11 +982,12 @@ func.func @concat_axis_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<?x3xf32>) -> ()
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: @rescale_i8
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -975,7 +1008,7 @@ func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -1003,17 +1036,18 @@ func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 
 // CHECK-LABEL: @rescale_i8_dyn_batch
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
   %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xi8>)
 
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xui8>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xui8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xui8>)
   %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xui8>)
 
   return
@@ -1024,13 +1058,14 @@ func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 
 // CHECK-LABEL: @rescale_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @rescale_dyn(%arg0 : tensor<1x?x?x32xi32>) -> () {
   // CHECK: %[[C1:.+]] = arith.constant 1
-  // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
+  // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[C2:.+]] = arith.constant 2
-  // CHECK: %[[DIM2:.+]] = tensor.dim %arg0, %[[C2]]
+  // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
   %0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = [1376784203 : i32], output_zp = 0 : i32, per_channel = false, scale32 = true, shift = [38 : i32]} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
   return
 }
@@ -1040,11 +1075,12 @@ func.func @rescale_dyn(%arg0 : tensor<1x?x?x32xi32>) -> () {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: @rescale_ui8
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -1071,11 +1107,12 @@ func.func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: @rescale_per_channel
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @rescale_per_channel(%arg0 : tensor<3xi8>) -> (tensor<3xi8>) {
   // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
   // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C243:%.+]] = arith.constant 243
   // CHECK: [[C252:%.+]] = arith.constant 252
@@ -1123,9 +1160,10 @@ func.func @rescaleUnnecessaryDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>)
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 
 // CHECK-LABEL: @reverse
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
@@ -1138,7 +1176,7 @@ func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
   %0 = "tosa.reverse"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<5x4xi32>
 
   // CHECK: %[[C1:.+]] = arith.constant 1
-  // CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C1]]
+  // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
@@ -1157,11 +1195,12 @@ func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL: @reverse_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @reverse_dyn(%arg0: tensor<?xi32>) -> () {
   // CHECK: %[[C0_1:.+]] = arith.constant 0
-  // CHECK: %[[D0_1:.+]] = tensor.dim %arg0, %[[C0_1]]
+  // CHECK: %[[D0_1:.+]] = tensor.dim %[[ARG0]], %[[C0_1]]
   // CHECK: %[[C0_2:.+]] = arith.constant 0
-  // CHECK: %[[D0_2:.+]] = tensor.dim %arg0, %[[C0_2]]
+  // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
@@ -1180,22 +1219,26 @@ func.func @reverse_dyn(%arg0: tensor<?xi32>) -> () {
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 
 // CHECK-LABEL: @tile
+// CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8>
 func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
-  // CHECK:   linalg.yield %arg1 : i8
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
+  // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1, 2], [3]]
   %0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<2x3xi8>)  -> (tensor<4x3xi8>)
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
-  // CHECK:   linalg.yield %arg1 : i8
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
+  // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
   %1 = "tosa.tile"(%arg0) {multiples = [1, 2]} : (tensor<2x3xi8>)  -> (tensor<2x6xi8>)
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
-  // CHECK:   linalg.yield %arg1 : i8
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
+  // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
   %2 = "tosa.tile"(%arg0) {multiples = [5, 7]} : (tensor<2x3xi8>)  -> (tensor<10x21xi8>)
 
@@ -1208,12 +1251,14 @@ func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 
 // CHECK-LABEL: @tile_dyn_input
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
   // CHECK: %[[CST0:.+]] = arith.constant 0
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST0]] : tensor<?x3xi8>
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor<?x3xi8>
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
-  // CHECK:   linalg.yield %arg1 : i8
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
+  // CHECK: ^bb0(%[[ARG1:.+]]: i8,
+  // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
   // CHECK: tensor.expand_shape %[[COLLAPSED]] {{\[}}[0, 1]]
   %0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<?x3xi8>)  -> (tensor<?x3xi8>)
@@ -1227,12 +1272,14 @@ func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 
 // CHECK-LABEL: @tile_dyn_multiples
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
   // CHECK: %[[CST1:.+]] = arith.constant 1
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST1]] : tensor<2x3xi8>
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8>
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
-  // CHECK:   linalg.yield %arg1 : i8
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
+  // CHECK: ^bb0(%[[ARG1:.+]]: i8,
+  // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
   // CHECK: tensor.expand_shape %[[COLLAPSED]] {{\[}}[0, 1]]
   %0 = "tosa.tile"(%arg0) {multiples = [2, -1]} : (tensor<2x3xi8>)  -> (tensor<2x?xi8>)
@@ -1242,6 +1289,8 @@ func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
 
 // -----
 
+// CHECK-LABEL: @pad_float
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
 func.func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
   %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
   // TODO: Output contains multiple "arith.constant 1 : index".
@@ -1250,8 +1299,7 @@ func.func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
   // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
   // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
   // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32
-  // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
-  // CHECK: ^bb0(%arg1: index, %arg2: index):
+  // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
   // CHECK:   tensor.yield [[CST]]
   // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
   %1 = "tosa.pad"(%arg0, %0)  : (tensor<1x2xf32>, tensor<2x2xi32>)  -> (tensor<4x9xf32>)
@@ -1286,8 +1334,7 @@ func.func @pad_float_explicit(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
   // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
   // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
   // CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32
-  // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
-  // CHECK: ^bb0(%arg1: index, %arg2: index):
+  // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
   // CHECK:   tensor.yield [[CST]]
   // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
   %1 = arith.constant dense<42.0> : tensor<f32>
@@ -1305,8 +1352,7 @@ func.func @pad_dyn_input(%arg0 : tensor<?x2xf32>) -> (tensor<?x9xf32>) {
   // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
   // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
   // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32
-  // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
-  // CHECK: ^bb0(%arg1: index, %arg2: index):
+  // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
   // CHECK:   tensor.yield [[CST]]
   // CHECK: } : tensor<?x2xf32> to tensor<?x9xf32>
   %1 = "tosa.pad"(%arg0, %0)  : (tensor<?x2xf32>, tensor<2x2xi32>)  -> (tensor<?x9xf32>)
@@ -1321,8 +1367,7 @@ func.func @pad_dyn_padding(%arg0 : tensor<1x2xf32>) -> (tensor<?x9xf32>) {
   // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
   // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
   // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32
-  // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
-  // CHECK: ^bb0(%arg1: index, %arg2: index):
+  // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
   // CHECK:   tensor.yield [[CST]]
   // CHECK: } : tensor<1x2xf32> to tensor<?x9xf32>
   %1 = "tosa.pad"(%arg0, %0)  : (tensor<1x2xf32>, tensor<2x2xi32>)  -> (tensor<?x9xf32>)
@@ -1344,12 +1389,13 @@ func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
   // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   [[IDX:%.+]] = linalg.index 0
   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
-  // CHECK:   [[CMP:%.+]] = arith.cmpi sgt, %arg2, %arg4
-  // CHECK:   [[SELECT_VAL:%.+]] = arith.select [[CMP]], %arg2, %arg4
-  // CHECK:   [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %arg3
+  // CHECK:   [[CMP:%.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
+  // CHECK:   [[SELECT_VAL:%.+]] = arith.select [[CMP]], %[[ARG1]], %[[ARG3]]
+  // CHECK:   [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %[[ARG2]]
   // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
   %0 = "tosa.argmax"(%arg0) { axis = 0 : i64} : (tensor<3x2xi32>)  -> (tensor<2xi32>)
 
@@ -1359,12 +1405,13 @@ func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
   // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#map0, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: linalg.generic {indexing_maps = [#map0, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   [[IDX:%.+]] = linalg.index 1
   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
-  // CHECK:   [[CMP:%.+]] = arith.cmpi sgt, %arg2, %arg4
-  // CHECK:   [[SELECT_VAL:%.+]] = arith.select [[CMP]], %arg2, %arg4
-  // CHECK:   [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %arg3
+  // CHECK:   [[CMP:%.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
+  // CHECK:   [[SELECT_VAL:%.+]] = arith.select [[CMP]], %[[ARG1]], %[[ARG3]]
+  // CHECK:   [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %[[ARG2]]
   // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
   %1 = "tosa.argmax"(%arg0) { axis = 1 : i64} : (tensor<3x2xi32>)  -> (tensor<3xi32>)
 
@@ -1387,19 +1434,20 @@ func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
 
 func.func @argmax_dyn_non_axis(%arg0 : tensor<3x?xi32>) -> () {
   // CHECK: %[[CST1:.+]] = arith.constant 1
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST1]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]]
   // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
   // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
   // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
   // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
+  // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   %[[IDX:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
-  // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %arg1, %arg3
-  // CHECK:   %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %arg1, %arg3
-  // CHECK:   %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %arg2
+  // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
+  // CHECK:   %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG3]]
+  // CHECK:   %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %[[ARG2]]
   // CHECK:   linalg.yield %[[SELECT_IDX]], %[[SELECT_VAL]]
   %0 = "tosa.argmax"(%arg0) { axis = 0 : i64} : (tensor<3x?xi32>)  -> (tensor<?xi32>)
   return
@@ -1417,12 +1465,12 @@ func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
   // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
   // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
   // CHECK:   %[[IDX:.+]] = linalg.index 1
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
-  // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %arg1, %arg3
-  // CHECK:   %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %arg1, %arg3
-  // CHECK:   %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %arg2
+  // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
+  // CHECK:   %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG3]]
+  // CHECK:   %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %[[ARG2]]
   // CHECK:   linalg.yield %[[SELECT_IDX]], %[[SELECT_VAL]]
   %0 = "tosa.argmax"(%arg0) { axis = 1 : i64} : (tensor<3x?xi32>)  -> (tensor<3xi32>)
   return
@@ -1431,44 +1479,54 @@ func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
 // -----
 
 // CHECK-LABEL: @gather_float
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
-  // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
-  // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG0]]
+  // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
   // CHECK:   %[[IDX2:.+]] = linalg.index 2
-  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xf32>
+  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xf32>
   // CHECK:   linalg.yield %[[EXTRACT]]
   %0 = "tosa.gather"(%arg0, %arg1)  : (tensor<2x3x2xf32>, tensor<2x3xi32>)  -> (tensor<2x3x2xf32>)
   return
 }
 
+// -----
+
 // CHECK-LABEL: @gather_float_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_float_dyn(%arg0: tensor<?x3x2xf32>, %arg1: tensor<?x3xi32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
-  // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
-  // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG0]]
+  // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
   // CHECK:   %[[IDX2:.+]] = linalg.index 2
-  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<?x3x2xf32>
+  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<?x3x2xf32>
   // CHECK:   linalg.yield %[[EXTRACT]]
   %0 = "tosa.gather"(%arg0, %arg1)  : (tensor<?x3x2xf32>, tensor<?x3xi32>)  -> (tensor<?x3x2xf32>)
   return
 }
 
+// -----
+
 // CHECK-LABEL: @gather_int
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
-  // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: i32)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
-  // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG0]]
+  // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
   // CHECK:   %[[IDX2:.+]] = linalg.index 2
-  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xi32>
+  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xi32>
   // CHECK:   linalg.yield %[[EXTRACT]]
   %0 = "tosa.gather"(%arg0, %arg1)  : (tensor<2x3x2xi32>, tensor<2x3xi32>)  -> (tensor<2x3x2xi32>)
   return
@@ -1477,14 +1535,16 @@ func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
 // -----
 
 // CHECK-LABEL: @table8
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
   // CHECK:   %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
-  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg1[%[[ADD]]]
+  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
   // CHECK:   linalg.yield %[[EXTRACT]]
   %0 = "tosa.table"(%arg0, %arg1)  : (tensor<6xi8>, tensor<512xi8>)  -> (tensor<6xi8>)
   return
@@ -1493,11 +1553,13 @@ func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
 // -----
 
 // CHECK-LABEL: @table16
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
-  // CHECK: ^bb0(%arg2: i16, %arg3: i32)
-  // CHECK: %[[EXT_IN:.+]] = arith.extsi %arg2
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
+  // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32)
+  // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]]
   // CHECK: %[[C32768:.+]] = arith.constant 32768
   // CHECK: %[[C7:.+]] = arith.constant 7
   // CHECK: %[[C1:.+]] = arith.constant 1
@@ -1508,8 +1570,8 @@ func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
   // CHECK: %[[IDXPLUS1:.+]] = arith.addi %[[IDX]], %[[C1]]
   // CHECK: %[[IDX_CAST:.+]] = arith.index_cast %[[IDX]]
   // CHECK: %[[IDXPLUS1_CAST:.+]] = arith.index_cast %[[IDXPLUS1]]
-  // CHECK: %[[BASE:.+]] = tensor.extract %arg1[%[[IDX_CAST]]]
-  // CHECK: %[[NEXT:.+]] = tensor.extract %arg1[%[[IDXPLUS1_CAST]]]
+  // CHECK: %[[BASE:.+]] = tensor.extract %[[ARG1]][%[[IDX_CAST]]]
+  // CHECK: %[[NEXT:.+]] = tensor.extract %[[ARG1]][%[[IDXPLUS1_CAST]]]
   // CHECK: %[[BASE_EXT:.+]] = arith.extsi %[[BASE]]
   // CHECK: %[[NEXT_EXT:.+]] = arith.extsi %[[NEXT]]
   // CHECK: %[[BASE_MUL:.+]] = arith.shli %[[BASE_EXT]], %[[C7]]
@@ -1524,16 +1586,18 @@ func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
 // -----
 
 // CHECK-LABEL: @table8_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8_dyn(%arg0: tensor<?xi8>, %arg1: tensor<512xi8>) -> () {
   // CHECK: %[[CST0:.+]] = arith.constant 0
-  // CHECK: %[[DYN:.+]] = tensor.dim %arg0, %[[CST0]]
+  // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
   // CHECK:   %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
-  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg1[%[[ADD]]]
+  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
   // CHECK:   linalg.yield %[[EXTRACT]]
   %0 = "tosa.table"(%arg0, %arg1)  : (tensor<?xi8>, tensor<512xi8>)  -> (tensor<?xi8>)
   return
@@ -1542,14 +1606,16 @@ func.func @table8_dyn(%arg0: tensor<?xi8>, %arg1: tensor<512xi8>) -> () {
 // -----
 
 // CHECK-LABEL: @table8_dyn_table
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
   // CHECK:   %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
-  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg1[%[[ADD]]]
+  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
   // CHECK:   linalg.yield %[[EXTRACT]]
   %0 = "tosa.table"(%arg0, %arg1)  : (tensor<6xi8>, tensor<?xi8>)  -> (tensor<6xi8>)
   return
@@ -1627,6 +1693,7 @@ func.func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () {
 // -----
 
 // CHECK-LABEL: @resize_bilinear
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @resize_bilinear(%input: tensor<1x2x2x1xf32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -1682,10 +1749,10 @@ func.func @resize_bilinear(%input: tensor<1x2x2x1xf32>) -> () {
   // CHECK: %[[XLOI:.+]] = arith.index_cast %[[XLO]]
   // CHECK: %[[XHII:.+]] = arith.index_cast %[[XHI]]
 
-  // CHECK: %[[LOLO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XLOI]], %[[IDX3]]]
-  // CHECK: %[[LOHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XHII]], %[[IDX3]]]
-  // CHECK: %[[HILO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XLOI]], %[[IDX3]]]
-  // CHECK: %[[HIHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XHII]], %[[IDX3]]]
+  // CHECK: %[[LOLO:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YLOI]], %[[XLOI]], %[[IDX3]]]
+  // CHECK: %[[LOHI:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YLOI]], %[[XHII]], %[[IDX3]]]
+  // CHECK: %[[HILO:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YHII]], %[[XLOI]], %[[IDX3]]]
+  // CHECK: %[[HIHI:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YHII]], %[[XHII]], %[[IDX3]]]
 
   // Compute the bilinear interpolation.
 
@@ -1709,6 +1776,7 @@ func.func @resize_bilinear(%input: tensor<1x2x2x1xf32>) -> () {
 // -----
 
 // CHECK-LABEL: @resize_nearest_int
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @resize_nearest_int(%input: tensor<1x2x2x1xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -1768,7 +1836,7 @@ func.func @resize_nearest_int(%input: tensor<1x2x2x1xi32>) -> () {
 
   // CHECK-DAG: %[[IDY:.+]] = arith.index_cast %[[VAL25]]
   // CHECK-DAG: %[[IDX:.+]] = arith.index_cast %[[VAL29]]
-  // CHECK: %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[IDY]], %[[IDX]], %[[IDX3]]]
+  // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[IDY]], %[[IDX]], %[[IDX3]]]
   // CHECK: linalg.yield %[[EXTRACT]]
   %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [128, 128], offset = [1, 2], stride_fp = [0. : f32, 0. : f32], offset_fp = [0. : f32, 0. : f32], shift = 8 : i32, mode = "NEAREST_NEIGHBOR" } : (tensor<1x2x2x1xi32>)  -> (tensor<1x4x4x1xi32>)
   return
@@ -1777,6 +1845,7 @@ func.func @resize_nearest_int(%input: tensor<1x2x2x1xi32>) -> () {
 // -----
 
 // CHECK-LABEL: @resize_bilinear_int
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @resize_bilinear_int(%input: tensor<1x2x2x1xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -1830,10 +1899,10 @@ func.func @resize_bilinear_int(%input: tensor<1x2x2x1xi8>) -> () {
   // CHECK: %[[XLOI:.+]] = arith.index_cast %[[XLO]]
   // CHECK: %[[XHII:.+]] = arith.index_cast %[[XHI]]
 
-  // CHECK: %[[LOLO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XLOI]], %[[IDX3]]]
-  // CHECK: %[[LOHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XHII]], %[[IDX3]]]
-  // CHECK: %[[HILO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XLOI]], %[[IDX3]]]
-  // CHECK: %[[HIHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XHII]], %[[IDX3]]]
+  // CHECK: %[[LOLO:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YLOI]], %[[XLOI]], %[[IDX3]]]
+  // CHECK: %[[LOHI:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YLOI]], %[[XHII]], %[[IDX3]]]
+  // CHECK: %[[HILO:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YHII]], %[[XLOI]], %[[IDX3]]]
+  // CHECK: %[[HIHI:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[YHII]], %[[XHII]], %[[IDX3]]]
 
   // CHECK: %[[XLOLO:.+]] = arith.extsi %[[LOLO]]
   // CHECK: %[[XLOHI:.+]] = arith.extsi %[[LOHI]]
@@ -1862,9 +1931,10 @@ func.func @resize_bilinear_int(%input: tensor<1x2x2x1xi8>) -> () {
 // -----
 
 // CHECK-LABEL: @resize_dyn
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 func.func @resize_dyn(%input: tensor<?x2x2x1xi8>) -> () {
     // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[GENERIC:.+]] = linalg.generic
   %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [128, 128], offset = [1, 2], stride_fp = [0. : f32, 0. : f32], offset_fp = [0. : f32, 0. : f32], shift = 8 : i32, mode = "BILINEAR" } : (tensor<?x2x2x1xi8>)  -> (tensor<?x4x4x1xi32>)
diff --git a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
index 5ca63d9..108d870 100644
--- a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
@@ -239,21 +239,21 @@ func.func @multiple_redundant_args(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?xi32
 //  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0)>
 //  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //      CHECK: func @multiple_redundant_args(
-// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xi32>
-// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xi32>
-// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xi32>
-// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: tensor<?x?xi32>
-// CHECK-SAME:     %[[ARG4:[a-zA-Z0-9]+]]: tensor<?xi32>)
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xi32>
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xi32>
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xi32>
+// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xi32>
+// CHECK-SAME:     %[[ARG4:[a-zA-Z0-9_]+]]: tensor<?xi32>)
 //      CHECK:   %[[RETURN:.+]] = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:       ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] :
 // CHECK-SAME:       outs(%[[ARG2]] :
 //      CHECK:   ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32
-// CHECK-SAME:       %[[B1:[a-zA-Z0-9]+]]: i32
-// CHECK-SAME:       %[[B2:[a-zA-Z0-9]+]]: i32
-// CHECK-SAME:       %[[B3:[a-zA-Z0-9]+]]: i32
-// CHECK-SAME:       %[[B4:[a-zA-Z0-9]+]]: i32)
+// CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: i32
+// CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: i32
+// CHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: i32
+// CHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: i32)
 //      CHECK:     %[[T0:.+]] = arith.addi %[[B0]], %[[B1]]
 //      CHECK:     %[[T1:.+]] = arith.addi %[[T0]], %[[B1]]
 //      CHECK:     %[[T2:.+]] = arith.addi %[[T1]], %[[B2]]
diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir
index 3eed6d2..b562715 100644
--- a/mlir/test/Dialect/Linalg/decompose-ops.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir
@@ -28,9 +28,9 @@ func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : ten
 //  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
 //  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //      CHECK: func @simple_op(
-// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
@@ -43,26 +43,26 @@ func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : ten
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
 // CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
-// CHECK-SAME:       %[[B0:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B1:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B2:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B3:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B4:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B5:[a-zA-Z0-9]+]]: f32):
+// CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32):
 // CHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
-// CHECK-NEXT:     linalg.yield %[[S0]], %{{[a-zA-Z0-9]+}}, %[[S0]]
+// CHECK-NEXT:     linalg.yield %[[S0]], %{{[a-zA-Z0-9_]+}}, %[[S0]]
 //      CHECK:   %[[GENERIC2:.+]]:2 = linalg.generic
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#2 :
 // CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
 // CHECK-NEXT:   ^bb0(
-// CHECK-SAME:       %[[B6:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B7:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B8:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B9:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B10:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B11:[a-zA-Z0-9]+]]: f32):
+// CHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B8:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B9:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B10:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B11:[a-zA-Z0-9_]+]]: f32):
 // CHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B9]], %[[B8]]
 // CHECK-NEXT:     linalg.yield %[[B9]], %[[S1]]
 //      CHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]]#1
@@ -74,9 +74,9 @@ func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : ten
 //  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //  CANONICALIZECHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1)>
 //      CANONICALIZECHECK: func @simple_op(
-// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CANONICALIZECHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //  CANONICALIZECHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CANONICALIZECHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
@@ -89,9 +89,9 @@ func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : ten
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
 // CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
-// CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B2:[a-zA-Z0-9]+]]: f32):
+// CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32):
 // CANONICALIZECHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
 // CANONICALIZECHECK-NEXT:     linalg.yield %[[S0]]
 //      CANONICALIZECHECK:   %[[GENERIC2:.+]] = linalg.generic
@@ -100,9 +100,9 @@ func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : ten
 // CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]] :
 // CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
-// CANONICALIZECHECK-SAME:       %[[B3:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9]+]]: f32):
+// CANONICALIZECHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32):
 // CANONICALIZECHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B4]], %[[B3]]
 // CANONICALIZECHECK-NEXT:     linalg.yield %[[S1]]
 //      CANONICALIZECHECK:   return %[[GENERIC1]], %[[GENERIC2]]
@@ -137,9 +137,9 @@ func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x
 //  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
 //  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //      CHECK: func @simple_op_permuted_outputs(
-// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
@@ -152,27 +152,27 @@ func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
 // CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
-// CHECK-SAME:       %[[B0:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B1:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B2:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B3:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B4:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B5:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B6:[a-zA-Z0-9]+]]: f32):
+// CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32):
 // CHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
-// CHECK-NEXT:     linalg.yield %[[S0]], %{{[a-zA-Z0-9]+}}, %[[S0]]
+// CHECK-NEXT:     linalg.yield %[[S0]], %{{[a-zA-Z0-9_]+}}, %[[S0]]
 //      CHECK:   %[[GENERIC2:.+]]:3 = linalg.generic
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]], #[[MAP0]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#3 :
 // CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]] :
 // CHECK-NEXT:   ^bb0(
-// CHECK-SAME:       %[[B7:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B8:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B9:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B10:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B11:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[B12:[a-zA-Z0-9]+]]: f32):
+// CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B8:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B9:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B10:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B11:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[B12:[a-zA-Z0-9_]+]]: f32):
 // CHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B10]], %[[B9]]
 // CHECK-NEXT:     linalg.yield %[[B10]], %[[S1]], %[[B10]]
 //      CHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]]#1, %[[GENERIC1]]#2
@@ -182,9 +182,9 @@ func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x
 //  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //  CANONICALIZECHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1)>
 //      CANONICALIZECHECK: func @simple_op_permuted_outputs(
-// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CANONICALIZECHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //  CANONICALIZECHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //  CANONICALIZECHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
@@ -197,9 +197,9 @@ func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
 // CANONICALIZECHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
-// CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B2:[a-zA-Z0-9]+]]: f32):
+// CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32):
 // CANONICALIZECHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
 // CANONICALIZECHECK-NEXT:     linalg.yield %[[S0]], %[[S0]]
 //      CANONICALIZECHECK:   %[[GENERIC2:.+]] = linalg.generic
@@ -208,9 +208,9 @@ func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x
 // CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]]#0 :
 // CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
-// CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[B6:[a-zA-Z0-9]+]]: f32):
+// CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32):
 // CANONICALIZECHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B5]], %[[B4]]
 // CANONICALIZECHECK-NEXT:     linalg.yield %[[S1]]
 //      CANONICALIZECHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]], %[[GENERIC1]]#1
@@ -252,7 +252,7 @@ func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) ->
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B0:.+]]: f32
 // CHECK-SAME:         %[[B1:.+]]: i32
-// CHECK-SAME:         %[[B2:[a-zA-Z0-9]+]]: f64
+// CHECK-SAME:         %[[B2:[a-zA-Z0-9_]+]]: f64
 // CHECK-SAME:         %[[B3:.+]]: f64
 // CHECK-NEXT:       %[[S0:.+]] = arith.sitofp %[[B1]] : i32 to f64
 // CHECK-NEXT:       linalg.yield %{{.+}}, %[[S0]]
@@ -264,8 +264,8 @@ func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) ->
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B4:.+]]: f32
 // CHECK-SAME:         %[[B5:.+]]: i32
-// CHECK-SAME:         %[[B6:[a-zA-Z0-9]+]]: f64
-// CHECK-SAME:         %[[B7:[a-zA-Z0-9]+]]: f64
+// CHECK-SAME:         %[[B6:[a-zA-Z0-9_]+]]: f64
+// CHECK-SAME:         %[[B7:[a-zA-Z0-9_]+]]: f64
 // CHECK-SAME:         %[[B8:.+]]: f64
 // CHECK-NEXT:       %[[S1:.+]] = arith.extf %[[B4]] : f32 to f64
 // CHECK-NEXT:       linalg.yield %{{.+}}, %[[S1]]
@@ -277,8 +277,8 @@ func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) ->
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B9:.+]]: f32
 // CHECK-SAME:         %[[B10:.+]]: i32
-// CHECK-SAME:         %[[B11:[a-zA-Z0-9]+]]: f64
-// CHECK-SAME:         %[[B12:[a-zA-Z0-9]+]]: f64
+// CHECK-SAME:         %[[B11:[a-zA-Z0-9_]+]]: f64
+// CHECK-SAME:         %[[B12:[a-zA-Z0-9_]+]]: f64
 // CHECK-SAME:         %[[B13:.+]]: f64
 // CHECK-NEXT:       %[[S2:.+]] = arith.addf %[[B11]], %[[B12]] : f64
 // CHECK-NEXT:       linalg.yield %[[S2]]
@@ -318,8 +318,8 @@ func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) ->
 // CANONICALIZECHECK-SAME:       ins(%[[GENERIC0]], %[[GENERIC1]] :
 // CANONICALIZECHECK-SAME:       outs(%[[INIT0]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
-// CANONICALIZECHECK-SAME:         %[[B4:[a-zA-Z0-9]+]]: f64
-// CANONICALIZECHECK-SAME:         %[[B5:[a-zA-Z0-9]+]]: f64
+// CANONICALIZECHECK-SAME:         %[[B4:[a-zA-Z0-9_]+]]: f64
+// CANONICALIZECHECK-SAME:         %[[B5:[a-zA-Z0-9_]+]]: f64
 // CANONICALIZECHECK-SAME:         %[[B6:.+]]: f64
 // CANONICALIZECHECK-NEXT:       %[[S2:.+]] = arith.addf %[[B4]], %[[B5]] : f64
 // CANONICALIZECHECK-NEXT:       linalg.yield %[[S2]]
@@ -352,21 +352,21 @@ func.func @destination_passing_style(
 //  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 //  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //      CHECK: func.func @destination_passing_style(
-// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xf32>)
 //      CHECK:   %[[GENERIC1:.+]]:3 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
 // CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]], %[[ARG2]] :
 // CHECK-NEXT:   ^bb0(
-// CHECK-SAME:       %[[ARG4:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG5:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG6:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG7:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG8:[a-zA-Z0-9]+]]: f32
+// CHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG6:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG7:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG8:[a-zA-Z0-9_]+]]: f32
 // CHECK-NEXT:     %[[S1:.+]] = arith.addf %[[ARG4]], %[[ARG6]]
 // CHECK-NEXT:     linalg.yield %[[S1]], %{{.+}}, %[[S1]]
 //      CHECK:   %[[GENERIC2:.+]]:2 = linalg.generic
@@ -375,13 +375,13 @@ func.func @destination_passing_style(
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC1]]#2 :
 // CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
 // CHECK-NEXT:   ^bb0(
-// CHECK-SAME:       %[[ARG9:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG10:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG11:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG12:[a-zA-Z0-9]+]]: f32
-// CHECK-SAME:       %[[ARG13:[a-zA-Z0-9]+]]: f32
+// CHECK-SAME:       %[[ARG9:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG10:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG11:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG12:[a-zA-Z0-9_]+]]: f32
+// CHECK-SAME:       %[[ARG13:[a-zA-Z0-9_]+]]: f32
 // CHECK-NEXT:     %[[S2:.+]] = arith.mulf %[[ARG10]], %[[ARG12]]
-// CHECK-NEXT:     linalg.yield %[[ARG6]], %[[S2]]
+// CHECK-NEXT:     linalg.yield %[[ARG11]], %[[S2]]
 //      CHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]]#1
 
 //  CANONICALIZECHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0)>
@@ -389,18 +389,18 @@ func.func @destination_passing_style(
 //  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
 //  CANONICALIZECHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
 //      CANONICALIZECHECK: func.func @destination_passing_style(
-// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CANONICALIZECHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
+// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CANONICALIZECHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xf32>)
 //      CANONICALIZECHECK:   %[[GENERIC1:.+]] = linalg.generic
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
 // CANONICALIZECHECK-SAME:       outs(%[[ARG2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
-// CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-NEXT:     %[[S1:.+]] = arith.addf %[[ARG4]], %[[ARG5]]
 // CANONICALIZECHECK-NEXT:     linalg.yield %[[S1]]
 //      CANONICALIZECHECK:   %[[GENERIC2:.+]]:2 = linalg.generic
@@ -409,10 +409,10 @@ func.func @destination_passing_style(
 // CANONICALIZECHECK-SAME:       ins(%[[ARG1]], %[[GENERIC1]] :
 // CANONICALIZECHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
-// CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[ARG6:[a-zA-Z0-9]+]]: f32
-// CANONICALIZECHECK-SAME:       %[[ARG7:[a-zA-Z0-9]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[ARG6:[a-zA-Z0-9_]+]]: f32
+// CANONICALIZECHECK-SAME:       %[[ARG7:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-NEXT:     %[[S2:.+]] = arith.mulf %[[ARG4]], %[[ARG6]]
 // CANONICALIZECHECK-NEXT:     linalg.yield %[[ARG5]], %[[S2]]
 //      CANONICALIZECHECK:   return %[[GENERIC1]], %[[GENERIC2]]#1
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
index cf69f04..ca142e3 100644
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -663,7 +663,7 @@ func.func @generic_index_op2(%arg0: tensor<1x8xf64>, %arg1: tensor<1x8xi32>) ->
   } -> tensor<1x8xf64>
 
   // CHECK-NEXT:   %[[R:.*]]:2 = linalg.generic
-  //      CHECK:     bb0(%[[BBA:[0-9a-z]*]]: f64, %[[BBB:[0-9a-z]*]]: i32):
+  //      CHECK:     bb0(%[[BBA:[0-9a-zA-Z_]*]]: f64, %[[BBB:[0-9a-zA-Z_]*]]: i32):
   // CHECK-NEXT:       %[[A:.*]] = func.call @compute1(%[[BBA]]) : (f64) -> f64
   // CHECK-NEXT:       %[[B:.*]] = func.call @compute2(%[[A]], %[[BBB]]) : (f64, i32) -> i32
   // CHECK-NEXT:       linalg.yield %[[A]], %[[B]] : f64, i32
@@ -1071,15 +1071,15 @@ module {
   }
 }
 // CHECK-LABEL: func.func @fuse_multi_result_producer
-//  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<f32>
-//  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<f32>
+//  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<f32>
+//  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<f32>
 //       CHECK:   %[[INIT:.+]] = tensor.empty
 //       CHECK:   %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
 //  CHECK-SAME:       outs(%[[INIT]] :
 //  CHECK-NEXT:     ^bb0
-//  CHECK-SAME:         %[[B0:[a-zA-Z0-9]+]]: f32
-//  CHECK-SAME:         %[[B1:[a-zA-Z0-9]+]]: f32
+//  CHECK-SAME:         %[[B0:[a-zA-Z0-9_]+]]: f32
+//  CHECK-SAME:         %[[B1:[a-zA-Z0-9_]+]]: f32
 //   CHECK-DAG:     %[[T0:.+]] = arith.addf %[[B0]], %[[B1]]
 //   CHECK-DAG:     %[[T1:.+]] = arith.addf %[[T0]], %[[B1]]
 //   CHECK-DAG:     %[[T2:.+]] = arith.addf %[[T1]], %[[B1]]
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
index b98086a..3349af6 100644
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -57,7 +57,7 @@ func.func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1
 // CHECK:      %[[R2c:.+]] = linalg.generic
 // CHECK-SAME:   indexing_maps = [#[[$MAP4]], #[[$MAP5]]]
 // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK:        ins(%arg0 : tensor<1x28x28x1xf32>) outs(%1 : tensor<1x32x32x1xf32>)
-// CHECK:      ^bb0(%[[VAL:.+]]: f32, %arg2: f32)
+// CHECK:        ins(%{{.*}} : tensor<1x28x28x1xf32>) outs(%{{.*}} : tensor<1x32x32x1xf32>)
+// CHECK:      ^bb0(%[[VAL:.+]]: f32, %{{.*}}: f32)
 // CHECK:        linalg.yield %[[VAL]] : f32
 // CHECK:      return %[[R2c:.+]]
diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
index 5c4be8a..2506eee 100644
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -212,8 +212,8 @@ func.func @indexed_consumer_reshape_producer_fusion(%arg0 : tensor<?x?x4x?xi32>,
 //       CHECK: func @indexed_consumer_reshape_producer_fusion
 //       CHECK:   linalg.generic
 //       CHECK:   ^{{.*}}(
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: i32, %[[ARG4:[a-zA-Z0-9]+]]: i32,
-//  CHECK-SAME:     %[[ARG8:[a-zA-Z0-9]+]]: i32)
+//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: i32, %[[ARG4:[a-zA-Z0-9_]+]]: i32,
+//  CHECK-SAME:     %[[ARG8:[a-zA-Z0-9_]+]]: i32)
 //   CHECK-DAG:     %[[IDX0:.+]] = linalg.index 0 : index
 //   CHECK-DAG:     %[[IDX1:.+]] = linalg.index 1 : index
 //   CHECK-DAG:     %[[IDX2:.+]] = linalg.index 2 : index
@@ -261,8 +261,8 @@ func.func @indexed_producer_reshape_consumer_fusion(%arg0 : tensor<?x?xi32>,
 //       CHECK: func @indexed_producer_reshape_consumer_fusion
 //       CHECK:   linalg.generic
 //       CHECK:   ^{{.*}}(
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: i32, %[[ARG4:[a-zA-Z0-9]+]]: i32,
-//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9]+]]: i32)
+//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: i32, %[[ARG4:[a-zA-Z0-9_]+]]: i32,
+//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9_]+]]: i32)
 //   CHECK-DAG:     %[[IDX0:.+]] = linalg.index 0 : index
 //   CHECK-DAG:     %[[IDX1:.+]] = linalg.index 1 : index
 //   CHECK-DAG:     %[[IDX2:.+]] = linalg.index 2 : index
@@ -331,8 +331,8 @@ func.func @reshape_as_consumer_permutation
 //  CHECK-SAME:     ins(%[[T1]], %[[T2]] : tensor<5x6x7x2x3x4xi32>, tensor<5x6x7x4xi32>)
 //  CHECK-SAME:     outs(%[[T3]] : tensor<2x3x4x5x6x7xi32>)
 //       CHECK:   ^{{.+}}(
-//  CHECK-SAME:     %[[ARG8:[a-zA-Z0-9]+]]: i32, %[[ARG9:[a-zA-Z0-9]+]]: i32,
-//  CHECK-SAME:     %[[ARG10:[a-zA-Z0-9]+]]: i32)
+//  CHECK-SAME:     %[[ARG8:[a-zA-Z0-9_]+]]: i32, %[[ARG9:[a-zA-Z0-9_]+]]: i32,
+//  CHECK-SAME:     %[[ARG10:[a-zA-Z0-9_]+]]: i32)
 //   CHECK-DAG:       %[[IDX0:.+]] = linalg.index 0 : index
 //   CHECK-DAG:       %[[IDX1:.+]] = linalg.index 1 : index
 //   CHECK-DAG:       %[[IDX2:.+]] = linalg.index 2 : index
-- 
2.7.4