Entry blocks can never have predecessors, so this is unnecessary.
Fixes #53287
Differential Revision: https://reviews.llvm.org/D117713
args_out = 1 : i64,
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]} %arg1, %1 {
- ^bb0(%arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32):
%4 = exp %arg3 : f32
linalg.yield %4 : f32
}: memref<2xf32>, memref<2xf32>
%1 = arith.cmpi slt, %i, %arg1 : index
scf.condition(%1) %i : index
} do {
- ^bb0(%i: index): // no predecessors
+ ^bb0(%i: index):
%1 = arith.addi %i, %c1 : index
%2 = arith.addi %arg2, %arg2 : i32
memref.store %2, %arg0[%i] : memref<?xi32>
/// Canonicalizes the pattern of the form
///
/// %tensor = tensor.generate %x {
-/// ^bb0(%arg0: index): // no predecessors
+/// ^bb0(%arg0: index):
/// <computation>
/// yield %1 : index
/// } : tensor<?xindex>
if (!block->getParent()) {
os << " // block is not in a region!";
} else if (block->hasNoPredecessors()) {
- os << " // no predecessors";
+ if (!block->isEntryBlock())
+ os << " // no predecessors";
} else if (auto *pred = block->getSinglePredecessor()) {
os << " // pred: ";
printBlockName(pred);
omp.parallel {
// CHECK: omp.wsloop (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
"omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({
- ^bb0(%arg6: index, %arg7: index): // no predecessors
+ ^bb0(%arg6: index, %arg7: index):
// CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
// CHECK-DAG: %[[CAST_ARG7:.*]] = builtin.unrealized_conversion_cast %[[ARG7]] : i64 to index
// CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> ()
scf.condition(%0) %2, %3 : i64, f64
} do {
// CHECK: ^[[AFTER]](%[[ARG4:.*]]: i64, %[[ARG5:.*]]: f64):
- ^bb0(%arg2: i64, %arg3: f64): // no predecessors
+ ^bb0(%arg2: i64, %arg3: f64):
// CHECK: br ^[[BEFORE]](%{{.*}}, %{{.*}} : i32, f32)
scf.yield %c0_i32, %cst : i32, f32
}
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
// CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]}
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
- // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
// CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
// CHECK: linalg.yield [[ADD]] : f32
// CHECK: } -> tensor<1x5x5x33xf32>
// CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor<?x5x5x3x11xf32>)
// CHECK: %[[COLLAPSED:.+]] = "tosa.reshape"(%[[DEPTH]]) {new_shape = [-1, 5, 5, 33]}
// CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) outs(%[[OUT]] : tensor<?x5x5x33xf32>) {
- // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
// CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32
// CHECK: linalg.yield %[[ADD]] : f32
// CHECK: } -> tensor<?x5x5x33xf32>
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
// CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]}
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
- // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
// CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
// CHECK: linalg.yield [[ADD]] : f32
// CHECK: } -> tensor<1x5x5x33xf32>
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
// CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]}
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
- // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors
+ // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):
// CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
// CHECK: linalg.yield [[ADD]] : i32
// CHECK: } -> tensor<1x12x12x512xi32>
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
// CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]}
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
- // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors
+ // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):
// CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
// CHECK: linalg.yield [[ADD]] : i32
// CHECK: } -> tensor<1x10x10x512xi32>
// CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
// CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32
// CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] {
- // CHECK: ^bb0(%arg1: index, %arg2: index): // no predecessors
+ // CHECK: ^bb0(%arg1: index, %arg2: index):
// CHECK: linalg.yield [[CST]]
// CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
%1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor<4x9xf32>)
// CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
// CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32
// CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] {
- // CHECK: ^bb0(%arg1: index, %arg2: index): // no predecessors
+ // CHECK: ^bb0(%arg1: index, %arg2: index):
// CHECK: linalg.yield [[CST]]
// CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
%1 = arith.constant dense<42.0> : tensor<f32>
// CHECK-NEXT: }
//
// GENERIC: "affine.for"() ({
- // GENERIC-NEXT: ^bb0(%{{.*}}: index): // no predecessors
+ // GENERIC-NEXT: ^bb0(%{{.*}}: index):
// GENERIC-NEXT: "affine.yield"() : () -> ()
// GENERIC-NEXT: }) {lower_bound = #map0, step = 1 : index, upper_bound = #map1} : () -> ()
affine.for %i = 0 to 10 {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
%out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] {
- ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index): // no predecessors
+ ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):
linalg.yield %cst : f32
} : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
return %out : tensor<4x?x?x?xf32>
linalg.yield %3 : i32
} -> tensor<7x7xi32>
%3 = linalg.pad_tensor %arg2 low[%c0, %c0] high[%high, %high] {
- ^bb0(%arg9: index, %arg10: index): // no predecessors
+ ^bb0(%arg9: index, %arg10: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<2x4xf32>
return
%cst = arith.constant 0.000000e+00 : f32
%dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
%padded = linalg.pad_tensor %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
linalg.yield %cst: f32
} : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
return %padded: tensor<?x?x?x?xf32>
%cst = arith.constant 0.000000e+00 : f32
%dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
%padded = linalg.pad_tensor %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
linalg.yield %cst: f32
} : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
return %padded: tensor<?x?x?x?xf32>
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.cast %t : tensor<8x?xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %s] {
- ^bb0(%arg9: index, %arg10: index): // no predecessors
+ ^bb0(%arg9: index, %arg10: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<8x32xf32>
return %1 : tensor<8x32xf32>
%cst = arith.constant 0.0 : f32
%0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %c0] {
- ^bb0(%arg1: index, %arg2: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<4x4xf32>
return %1 : tensor<4x4xf32>
%cst = arith.constant 0.0 : f32
%0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[0, 1] {
- ^bb0(%arg1: index, %arg2: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<4x4xf32>
return %1 : tensor<4x4xf32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<f32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<f32>
%4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %1 : tensor<f32>, tensor<f32>)
outs(%3 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%5 = arith.mulf %arg3, %arg4 : f32
linalg.yield %5 : f32
} -> tensor<f32>
%7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%1, %4 : tensor<f32>, tensor<f32>)
outs(%6 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%5 = arith.divf %arg3, %arg4 : f32
linalg.yield %5 : f32
} -> tensor<f32>
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = arith.addf %arg3, %arg4 : f32
%3 = arith.mulf %2, %arg4 : f32
linalg.yield %3 : f32
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = "foreign.do_something"(%arg3, %arg4) {} : (f32, f32) -> f32
linalg.yield %2 : f32
} -> tensor<f32>
{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
ins(%arg0_t : tensor<i1>)
outs(%2 : tensor<i8>) {
- ^bb0(%arg2: i1, %arg3: i8): // no predecessors
+ ^bb0(%arg2: i1, %arg3: i8):
%10 = arith.extui %arg2 : i1 to i8
linalg.yield %10 : i8
} -> tensor<i8>
{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
ins(%arg1_t, %cst : tensor<i32>, tensor<i32>)
outs(%6 : tensor<i32>) {
- ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
%10 = arith.addi %arg2, %arg3 : i32
linalg.yield %10 : i32
} -> tensor<i32>
%4 = linalg.generic #attrs
ins(%2, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
%8 = linalg.generic #attrs
ins(%6, %6 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
%9 = arith.addi %arg0, %arg1 : i32
linalg.yield %9 : i32
} -> tensor<i32>
%4 = linalg.generic #attrs
ins(%2, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
%8 = linalg.generic #attrs
ins(%6, %6 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
%9 = arith.addi %arg0, %arg1 : i32
linalg.yield %9 : i32
} -> tensor<i32>
%4 = linalg.generic #attrs
ins(%2, %1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
%8 = linalg.generic #attrs
ins(%6, %12 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
%9 = arith.addi %arg0, %arg1 : i32
linalg.yield %9 : i32
} -> tensor<i32>
%2 = linalg.generic #attrs
ins(%0, %farg1 : tensor<i32>, tensor<i32>)
outs(%1 : tensor<i1>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
%6 = linalg.generic #attrs
ins(%4, %4 : tensor<i32>, tensor<i32>)
outs(%5 : tensor<i32>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
%8 = arith.addi %arg0, %arg1 : i32
linalg.yield %8 : i32
} -> tensor<i32>
%4 = linalg.generic #attrs
ins(%2, %farg1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
// DET-ALL: ^[[bb1]](%{{.*}}: tensor<10xi32>)
// DET-ALL: linalg.init_tensor [] : tensor<i32>
// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
-// DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32): // no predecessors
+// DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32):
// DET-ALL: %{{.*}} = arith.addi %{{.*}}, %{{.*}}
// DET-ALL: linalg.yield %{{.*}} : i32
// DET-ALL: } -> tensor<i32>
%4 = linalg.generic #attrs
ins(%2, %reshaped1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
%8 = linalg.generic #attrs
ins(%6, %6 : tensor<i32>, tensor<i32>)
outs(%7 : tensor<i32>) {
- ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors
+ ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
%9 = arith.addi %arg0, %arg1 : i32
linalg.yield %9 : i32
} -> tensor<i32>
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x5xf32>)
outs(%shape : tensor<5xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<5xf32>
return %0 : tensor<5xf32>
%2 = linalg.generic {i64, indexing_maps = [#map1, #map0],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) {
- ^bb0(%arg1: f32, %arg2: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> tensor<1x2x5xf32>
%3 = tensor.collapse_shape %2 [[0, 1], [2]]
iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
ins(%arg0 : tensor<1x?x1x?xf32>)
outs(%2 : tensor<1x?xf32>) {
- ^bb0(%arg1: f32, %arg2: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32):
%4 = arith.addf %arg1, %arg2 : f32
linalg.yield %4 : f32
} -> tensor<1x?xf32>
iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
ins(%arg0 : tensor<1x?x1x1xf32>)
outs(%2 : tensor<1x1xf32>) {
- ^bb0(%arg1: f32, %arg2: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32):
%4 = arith.addf %arg1, %arg2 : f32
linalg.yield %4 : f32
} -> tensor<1x1xf32>
iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
ins(%arg0 : tensor<?x1x?x1xf32>)
outs(%2 : tensor<?x1xf32>) {
- ^bb0(%arg1: f32, %arg2: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32):
%4 = arith.addf %arg1, %arg2 : f32
linalg.yield %4 : f32
} -> tensor<?x1xf32>
linalg.generic #trait
ins(%arg0 : memref<1x5xf32>)
outs(%shape : memref<5xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
}
return %shape : memref<5xf32>
linalg.generic {i64, indexing_maps = [#map1, #map0],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) {
- ^bb0(%arg1: f32, %arg2: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
}
%3 = memref.collapse_shape %1 [[0, 1], [2]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]}
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : memref<?x1x?xf32, #[[MAP0]]>, f32)
// CHECK-SAME: outs(%[[OUT]] : memref<?x?x?xf32>) {
-// CHECK: ^bb0(%{{.*}}: f32, %[[ARG:.*]]: f32, %{{.*}}: f32): // no predecessors
+// CHECK: ^bb0(%{{.*}}: f32, %[[ARG:.*]]: f32, %{{.*}}: f32):
// CHECK: linalg.yield %[[ARG]] : f32
// CHECK: }
// CHECK: return %[[ARG2]] : memref<?x1x?x1x?xf32>
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x5xf32>)
outs(%shape : tensor<5xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<5xf32>
return %0 : tensor<5xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%4 = arith.addf %arg3, %arg4 : f32
linalg.yield %4 : f32
} -> tensor<?x?xf32>
// CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]]
// CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]]
// CHECK-SAME: [[ARG2:%[a-zA-Z0-9_]*]]
- ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors
+ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
// CHECK: [[T1:%[a-zA-Z0-9_]*]] = arith.addf [[ARG0]], [[ARG1]]
// CHECK-NOT: linalg.yield
// CHECK: arith.mulf [[T1]], [[ARG2]]
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
outs(%2 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%4 = arith.addf %arg3, %arg4 : f32
linalg.yield %4 : f32
} -> tensor<?x?xf32>
// CHECK-SAME: [[ARG3:%[a-zA-Z0-9_]*]]
// CHECK-SAME: [[ARG4:%[a-zA-Z0-9_]*]]
// CHECK-SAME: [[ARG5:%[a-zA-Z0-9_]*]]
- ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors
+ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
// CHECK: [[T1:%[a-zA-Z0-9_]*]] = arith.addf [[ARG3]], [[ARG4]]
// CHECK-NOT: linalg.yield
// CHECK: arith.mulf [[T1]], [[ARG5]]
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%4 = arith.addf %arg3, %arg4 : f32
linalg.yield %4 : f32
} -> tensor<?x?xf32>
%4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
- ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors
+ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
%5 = arith.mulf %arg5, %arg6 : f32
linalg.yield %5 : f32
} -> tensor<?x?xf32>
%3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%4 = arith.addf %arg3, %arg4 : f32
linalg.yield %4 : f32
} -> tensor<?x?xf32>
%4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%2 : tensor<?x?xf32>){
- ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors
+ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
%5 = arith.mulf %arg5, %arg6 : f32
linalg.yield %5 : f32
} -> tensor<?x?xf32>
%2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
outs(%1 : tensor<?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%3 = arith.addf %arg3, %arg4 : f32
linalg.yield %3 : f32
} -> tensor<?xf32>
%5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
outs(%4 : tensor<?x?xf32>){
- ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): // no predecessors
+ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
%6 = arith.mulf %arg5, %arg6 : f32
linalg.yield %6 : f32
} -> tensor<?x?xf32>
%1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<f32>
%2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
ins(%1, %arg2 : tensor<f32>, tensor<f32>)
outs(%0 : tensor<f32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%3 = arith.mulf %arg3, %arg4 : f32
linalg.yield %3 : f32
} -> tensor<f32>
iterator_types = ["parallel", "parallel"] }
ins(%arg0, %arg1 : tensor<?x?xi32>, tensor<?x?xi32>)
outs(%2 : tensor<?x?xi32>) {
- ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
%10 = arith.addi %arg2, %arg3 : i32
linalg.yield %10 : i32
} -> tensor<?x?xi32>
iterator_types = ["parallel", "parallel"] }
ins(%3 : tensor<?x?xi32>)
outs(%2 : tensor<?x?xi32>) {
- ^bb0(%arg2: i32, %arg3: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%5 = arith.index_cast %idx0 : index to i32
iterator_types = ["parallel", "parallel"] }
ins(%arg0 : tensor<?x?xi32>)
outs(%2 : tensor<?x?xi32>) {
- ^bb0(%arg4: i32, %arg5: i32): // no predecessors
+ ^bb0(%arg4: i32, %arg5: i32):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%4 = arith.index_cast %idx0 : index to i32
iterator_types = ["parallel", "parallel"] }
ins(%3, %arg0 : tensor<?x?xi32>, tensor<?x?xi32>)
outs(%2 : tensor<?x?xi32>) {
- ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
%10 = arith.addi %arg2, %arg3 : i32
linalg.yield %10 : i32
} -> tensor<?x?xi32>
iterator_types = ["parallel", "parallel"] }
ins(%arg0 : tensor<?x?xi32>)
outs(%2 : tensor<?x?xi32>) {
- ^bb0(%arg2: i32, %arg3: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%4 = arith.index_cast %idx0 : index to i32
iterator_types = ["parallel", "parallel"] }
ins(%3 : tensor<?x?xi32>)
outs(%2 : tensor<?x?xi32>) {
- ^bb0(%arg2: i32, %arg3: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%5 = arith.index_cast %idx0 : index to i32
{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>],
iterator_types = []}
ins(%arg1 : tensor<i32>) outs(%0 : tensor<f32>) {
- ^bb0(%arg2: i32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: f32):
%3 = arith.index_cast %arg2 : i32 to index
%4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32>
linalg.yield %4 : f32
affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%1, %cst : tensor<f32>, tensor<10xf32>) outs(%2 : tensor<10xf32>) {
- ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
%4 = arith.mulf %arg2, %arg3 : f32
linalg.yield %4 : f32
} -> tensor<10xf32>
iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<1x10xf32>, tensor<1x10xf32>)
outs(%init : tensor<1x10xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<1x10xf32>
iterator_types = ["reduction"]}
ins(%0 : tensor<1x10xf32>)
outs(%arg2 : tensor<1xf32>) {
- ^bb0(%arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32):
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<1xf32>
iterator_types = ["parallel", "parallel"]
}
outs(%init0 : tensor<?x1xf32>) {
- ^bb0(%a: f32): // no predecessors
+ ^bb0(%a: f32):
linalg.yield %cp5 : f32
} -> tensor<?x1xf32>
%d0 = tensor.dim %0, %c0 : tensor<?x1xf32>
}
ins(%0, %1 : tensor<?x1xf32>, tensor<?x1xf32>)
outs(%init1 : tensor<?x1xf32>) {
- ^bb0(%a: f32, %b: f32, %c: f32): // no predecessors
+ ^bb0(%a: f32, %b: f32, %c: f32):
%m = arith.mulf %a, %b : f32
linalg.yield %m : f32
} -> tensor<?x1xf32>
indexing_maps = [#map0, #map1],
iterator_types = ["parallel", "parallel"]
} ins(%arg0 : tensor<f32>) outs(%4 : tensor<?x?xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<?x?xf32>
%6 = linalg.init_tensor [%arg1] : tensor<?xf32>
indexing_maps = [#map2, #map3],
iterator_types = ["parallel", "reduction"]
} ins(%5 : tensor<?x?xf32>) outs(%7 : tensor<?xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
%9 = arith.maxf %arg2, %arg3 : f32
linalg.yield %9 : f32
} -> tensor<?xf32>
linalg.generic #pointwise_2d_trait
ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
outs(%C : memref<?x?xf32>) {
- ^bb0(%e: f32, %arg5: f32, %arg6: f32): // no predecessors
+ ^bb0(%e: f32, %arg5: f32, %arg6: f32):
%2 = arith.addf %e, %arg5 : f32
linalg.yield %2 : f32
}
indexing_maps = [affine_map<(i, j) -> (j, i)>],
iterator_types = ["parallel", "parallel"]}
outs(%A : memref<?x?xindex>) {
- ^bb0(%a: index): // no predecessors
+ ^bb0(%a: index):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%0 = arith.addi %idx0, %idx1 : index
indexing_maps = [affine_map<(i, j) -> (i, j)>],
iterator_types = ["parallel", "parallel"]}
outs(%A : memref<?x?xindex>) {
- ^bb0(%a: index): // no predecessors
+ ^bb0(%a: index):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%0 = arith.addi %idx0, %idx1 : index
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0, %B : tensor<?x112x16xf32>, tensor<16xf32>)
outs(%init : tensor<?x112x16xf32>) {
- ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
%s = arith.subf %arg1, %arg2 : f32
linalg.yield %s : f32
} -> tensor<?x112x16xf32>
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>)
outs(%2 : tensor<112x112x16xf32>) {
- ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
%s = arith.subf %arg1, %arg2 : f32
%m = arith.mulf %s, %arg3 : f32
linalg.yield %m : f32
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>)
outs(%21 : tensor<112x112x16xf32>) {
- ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
%s = arith.subf %arg1, %arg2 : f32
linalg.yield %s : f32
} -> tensor<112x112x16xf32>
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>)
outs(%26 : tensor<2x3x5xf32>) {
- ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32): // no predecessors
+ ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32):
%29 = arith.sitofp %arg6 : i32 to f32
%30 = arith.addf %arg7, %cst_8 : f32
%31 = arith.divf %cst_7, %30 : f32
ins(%A, %A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
memref<?x?xf32, offset: 0, strides: [?, ?]>)
outs(%B : memref<?x?xf32, offset: 0, strides: [?, ?]>) {
- ^bb0(%E: f32, %arg5: f32, %arg6: f32): // no predecessors
+ ^bb0(%E: f32, %arg5: f32, %arg6: f32):
%2 = arith.addf %E, %arg5 : f32
linalg.yield %2 : f32
}
ins(%4, %5: memref<?x?xf32, offset: ?, strides: [?, ?]>,
memref<?x?xf32, offset: ?, strides: [?, ?]>)
outs(%6 : memref<?x?xf32, offset: ?, strides: [?, ?]>) {
- ^bb0(%arg6: f32, %arg7: f32, %arg8: f32): // no predecessors
+ ^bb0(%arg6: f32, %arg7: f32, %arg8: f32):
%7 = arith.mulf %arg6, %arg7 : f32
linalg.yield %7 : f32
}
linalg.generic #pointwise_2d_trait
ins(%A, %A : memref<?x?xf32>, memref<?x?xf32>)
outs(%B : memref<?x?xf32>) {
- ^bb0(%e: f32, %arg5: f32, %arg6: f32): // no predecessors
+ ^bb0(%e: f32, %arg5: f32, %arg6: f32):
%2 = arith.addf %e, %arg5 : f32
linalg.yield %2 : f32
}
ins(%4, %5: memref<?x?xf32, offset: ?, strides: [?, ?]>,
memref<?x?xf32, offset: ?, strides: [?, ?]>)
outs(%6 : memref<?x?xf32, offset: ?, strides: [?, ?]>) {
- ^bb0(%arg6: f32, %arg7: f32, %arg8: f32): // no predecessors
+ ^bb0(%arg6: f32, %arg7: f32, %arg8: f32):
%7 = arith.mulf %arg6, %arg7 : f32
linalg.yield %7 : f32
}
iterator_types = ["parallel", "parallel"]}
ins(%arg1 : memref<100xf32>)
outs(%0 : memref<100x10xf32>) {
- ^bb0(%arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32):
linalg.yield %arg3 : f32
}
%1 = memref.alloc() {temp = true} : memref<100x10xf32>
iterator_types = ["parallel", "parallel"]}
ins(%arg0, %0: memref<100x10xf32>, memref<100x10xf32>)
outs(%1 : memref<100x10xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%2 = arith.subf %arg3, %arg4 : f32
linalg.yield %2 : f32
}
iterator_types = ["parallel", "parallel"]}
ins(%6 : memref<?x?xf32, #map2>)
outs(%7 : memref<?x?xf32, #map2>) {
- ^bb0(%arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32):
%8 = math.exp %arg3 : f32
linalg.yield %8 : f32
}
func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
return %0 : tensor<1x32x32x1xf32>
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
%out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] {
- ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index): // no predecessors
+ ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):
linalg.yield %cst : f32
} : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
return %out : tensor<4x?x?x?xf32>
// MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
%2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
linalg.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
%3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor<?xf32>
%4 = affine.apply #map1(%1)
%5 = linalg.pad_tensor %2 low[%c0, %c0] high[%c0, %4] {
- ^bb0(%arg5: index, %arg6: index): // no predecessors
+ ^bb0(%arg5: index, %arg6: index):
linalg.yield %cst : f32
} : tensor<24x?xf32> to tensor<24x5xf32>
%6 = linalg.pad_tensor %3 low[%c0] high[%4] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
linalg.yield %cst : f32
} : tensor<?xf32> to tensor<5xf32>
%4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor<?xf32> to tensor<?xf32>
%5 = affine.apply #map1(%2)
%6 = linalg.pad_tensor %3 low[%c0, %c0] high[%c0, %5] {
- ^bb0(%arg5: index, %arg6: index): // no predecessors
+ ^bb0(%arg5: index, %arg6: index):
linalg.yield %cst : f32
} : tensor<24x?xf32> to tensor<24x4xf32>
%7 = linalg.pad_tensor %4 nofold low[%c0] high[%5] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
linalg.yield %cst : f32
} : tensor<?xf32> to tensor<4xf32>
// MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]
%2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
%5 = arith.index_cast %arg3 : index to i32
%6 = arith.sitofp %5 : i32 to f32
linalg.yield %6 : f32
%2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = tensor.extract %arg1[%arg3] : tensor<12xf32>
%4 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
linalg.yield %3 : f32
} : tensor<4xf32> to tensor<4xf32>
%2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = arith.index_cast %arg3 : i32 to index
%4 = linalg.pad_tensor %2 nofold low[%3] high[%3] {
- ^bb0(%arg6: index): // no predecessors
+ ^bb0(%arg6: index):
linalg.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
%2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
%3 = memref.load %arg3[%c0] : memref<?xindex>
%4 = linalg.pad_tensor %2 nofold low[%3] high[%3] {
- ^bb0(%arg6: index): // no predecessors
+ ^bb0(%arg6: index):
linalg.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
scf.yield %6 : index
}
%4 = linalg.pad_tensor %2 nofold low[%3] high[%3] {
- ^bb0(%arg6: index): // no predecessors
+ ^bb0(%arg6: index):
linalg.yield %cst : f32
} : tensor<4xf32> to tensor<4xf32>
// Check the fused and padded fill op does not prevent hoisting.
%4 = linalg.pad_tensor %2 nofold low[%c0, %c0] high[%3, %c0] {
- ^bb0(%arg5: index, %arg6: index): // no predecessors
+ ^bb0(%arg5: index, %arg6: index):
linalg.yield %cst : f32
} : tensor<?x24xf32> to tensor<5x24xf32>
%5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32>
%10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32>
%11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor<?x24xf32> to tensor<?x24xf32>
%12 = linalg.pad_tensor %9 nofold low[%c0, %c0] high[%3, %c0] {
- ^bb0(%arg7: index, %arg8: index): // no predecessors
+ ^bb0(%arg7: index, %arg8: index):
linalg.yield %cst : f32
} : tensor<?x3xf32> to tensor<5x3xf32>
%13 = linalg.pad_tensor %10 nofold low[%c0, %c0] high[%c0, %c0] {
- ^bb0(%arg7: index, %arg8: index): // no predecessors
+ ^bb0(%arg7: index, %arg8: index):
linalg.yield %cst : f32
} : tensor<3x24xf32> to tensor<3x24xf32>
// Check the output padding is not hoisted.
// MATMUL: %[[T8:.*]] = linalg.pad_tensor
%14 = linalg.pad_tensor %11 nofold low[%c0, %c0] high[%3, %c0] {
- ^bb0(%arg7: index, %arg8: index): // no predecessors
+ ^bb0(%arg7: index, %arg8: index):
linalg.yield %cst : f32
} : tensor<?x24xf32> to tensor<5x24xf32>
ins(%arg0, %scalar : tensor<4xf32>, tensor<f32>)
outs(%0 : tensor<4xf32>) {
// CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
- ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
// CHECK: tensor.extract %[[SCALAR]][]
%2 = arith.divf %arg1, %arg2 : f32
linalg.yield %2 : f32
ins(%arg0, %scalar : tensor<4xf32>, tensor<1xf32>)
outs(%0 : tensor<4xf32>) {
// CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
- ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
// CHECK: tensor.extract %[[SCALAR]][%[[ZERO]]]
%2 = arith.divf %arg1, %arg2 : f32
linalg.yield %2 : f32
func @pad_result_type(%arg0: tensor<?x2x3x4xi32>, %arg1: index, %arg2: i32) -> tensor<?x?x?x8xf32> {
// expected-error @+1 {{specified type 'tensor<?x?x?x8xf32>' does not match the inferred type 'tensor<?x?x?x9xi32>}}
%0 = linalg.pad_tensor %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] {
- ^bb0(%arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg3: index, %arg4: index):
linalg.yield %arg2 : i32
} : tensor<?x2x3x4xi32> to tensor<?x?x?x8xf32>
return %0 : tensor<?x?x?x8xf32>
func @pad_number_of_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
// expected-error @+1 {{expected the block to have 2 arguments}}
%0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: index, %arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg2: index, %arg3: index, %arg4: index):
linalg.yield %arg1 : i32
} : tensor<?x4xi32> to tensor<?x9xi32>
return %0 : tensor<?x9xi32>
func @pad_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
// expected-error @+1 {{op expected block argument 1 to be an index}}
%0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: i32, %arg3: i32): // no predecessors
+ ^bb0(%arg2: i32, %arg3: i32):
linalg.yield %arg1 : i32
} : tensor<?x4xi32> to tensor<?x9xi32>
return %0 : tensor<?x9xi32>
func @pad_num_yields(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
// expected-error @+3 {{op expected single yield operand (got 2)}}
%0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: index, %arg3: index): // no predecessors
+ ^bb0(%arg2: index, %arg3: index):
linalg.yield %arg1, %arg1 : i32, i32
} : tensor<?x4xi32> to tensor<?x9xi32>
return %0 : tensor<?x9xi32>
func @pad_yield_type(%arg0: tensor<?x4xi32>, %arg1: i8) -> tensor<?x9xi32> {
// expected-error @+3 {{op expected yield type to match shape element type}}
%0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
- ^bb0(%arg2: index, %arg3: index): // no predecessors
+ ^bb0(%arg2: index, %arg3: index):
linalg.yield %arg1 : i8
} : tensor<?x4xi32> to tensor<?x9xi32>
return %0 : tensor<?x9xi32>
func @generic_const_init(%arg0: memref<?xf32>) {
%cst = arith.constant 1.0 : f32
linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) {
- ^bb0(%arg1: f32): // no predecessors
+ ^bb0(%arg1: f32):
linalg.yield %cst : f32
}
return
%cst = arith.constant 0.000000e+00 : f32
%0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32>
%1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32>
%2 = bufferization.to_memref %1 : memref<2x31x31x3xf32>
func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.pad_tensor %arg0 low[1, 2, 2] high[0, 2, 2] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index, %arg3: index):
linalg.yield %cst : f32
} : tensor<1x28x28xf32> to tensor<2x32x32xf32>
return %0 : tensor<2x32x32xf32>
func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] {
- ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
return %0 : tensor<1x32x32x1xf32>
%arg0 : tensor<?x?x?x?xf32>, %arg2 : tensor<?x?x?x?xf32>, %arg1 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
// expected-error @+1 {{unexpected input index map for convolutions}}
%0 = "linalg.conv_2d_nhwc_hwcf"(%arg0, %arg1, %arg2) ({
- ^bb0(%arg3: f32, %arg4: f32, %arg5 : f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5 : f32):
%1 = "arith.mulf"(%arg3, %arg4) : (f32, f32) -> f32
%2 = "arith.addf"(%arg5, %1) : (f32, f32) -> f32
"linalg.yield"(%2) : (f32) -> ()
%arg0 : tensor<?x?x?x?xf32>, %arg1 : tensor<?x?x?x?x?xf32>, %arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
// expected-error @+1 {{expected output/filter indexing maps to be projected permutations}}
%0 = "linalg.conv_2d_nhwc_hwcf"(%arg0, %arg1, %arg2) ({
- ^bb0(%arg3: f32, %arg4: f32, %arg5 : f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5 : f32):
%1 = "arith.mulf"(%arg3, %arg4) : (f32, f32) -> f32
%2 = "arith.addf"(%arg5, %1) : (f32, f32) -> f32
"linalg.yield"(%2) : (f32) -> ()
// MATMUL: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
- ^bb0(%arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
- ^bb0(%arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
- ^bb0(%arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
- ^bb0(%arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<64x64xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
- ^bb0(%arg3: index, %arg4: index): // no predecessors
+ ^bb0(%arg3: index, %arg4: index):
linalg.yield %cst : f32
} : tensor<?x?xf32> to tensor<62x62xf32>
%2 = linalg.fill(%cst, %1) : f32, tensor<62x62xf32> -> tensor<62x62xf32>
iterator_types = ["parallel", "parallel"]}
ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>)
outs(%sum : memref<2x2xf32>) {
- ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): // no predecessors
+ ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
%0 = arith.addf %lhs_in, %rhs_in : f32
linalg.yield %0 : f32
}
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0, %arg1, %arg2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, f32)
outs(%0 : tensor<?x?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):
%1 = arith.mulf %arg3, %arg4 : f32
%2 = arith.addf %1, %arg5 : f32
linalg.yield %2 : f32
iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>, f32)
outs(%arg0 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):
%1 = arith.mulf %arg3, %arg4 : f32
%2 = arith.addf %1, %arg5 : f32
linalg.yield %2 : f32
iterator_types = ["parallel", "parallel"]}
ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>)
outs(%0 : tensor<264x4xf32>) {
- ^bb0(%arg1: f32, %arg2: f32, %s: f32): // no predecessors
+ ^bb0(%arg1: f32, %arg2: f32, %s: f32):
%2 = arith.mulf %arg1, %arg2 : f32
linalg.yield %2 : f32
} -> tensor<264x4xf32>
iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xi32>, tensor<?x?xi32>)
outs(%arg0 : tensor<?x?xi32>) {
- ^bb0(%arg3: i32, %arg4: i32, %s: i32): // no predecessors
+ ^bb0(%arg3: i32, %arg4: i32, %s: i32):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%1 = arith.muli %arg3, %arg4 : i32
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0 : tensor<264x?xi32>)
outs(%shape : tensor<264x?x4xi32>) {
- ^bb0(%arg1: i32, %s: i32): // no predecessors
+ ^bb0(%arg1: i32, %s: i32):
%idx0 = linalg.index 0 : index
%idx1 = linalg.index 1 : index
%idx2 = linalg.index 2 : index
iterator_types = ["parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg0 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %s: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %s: f32):
%1 = arith.mulf %arg3, %arg4 : f32
linalg.yield %1 : f32
} -> tensor<?x?xf32>
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
ins(%0 : tensor<5xf32>) outs(%1 : tensor<5x5xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<5x5xf32>
return %2 : tensor<5x5xf32>
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
ins(%arg0 : tensor<5xf32>) outs(%0 : tensor<5x5xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<5x5xf32>
%2 = tensor.expand_shape %1 [[0, 1], [2]]
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0, %arg1 : tensor<?x2x4xf32>, tensor<?x2x4xf32>)
outs(%2 : tensor<?x2x4xf32>) {
- ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
%4 = arith.mulf %arg2, %arg3 : f32
linalg.yield %4 : f32
} -> tensor<?x2x4xf32>
iterator_types = ["parallel"]}
ins(%0, %arg1 : tensor<1xi64>, tensor<?xi64>)
outs(%1 : tensor<1xi64>) {
- ^bb0(%arg4: i64, %arg5: i64, %arg6: i64): // no predecessors
+ ^bb0(%arg4: i64, %arg5: i64, %arg6: i64):
%3 = arith.addi %arg4, %arg5 : i64
linalg.yield %3 : i64
} -> tensor<1xi64>
iterator_types = ["parallel", "parallel", "parallel", "parallel"] }
ins(%0 : tensor<?x?x4x?xi32>)
outs(%0 : tensor<?x?x4x?xi32>) {
- ^bb0(%arg6: i32, %arg7 : i32): // no predecessors
+ ^bb0(%arg6: i32, %arg7 : i32):
%idx = linalg.index 0 : index
%2 = arith.index_cast %idx : index to i32
%3 = arith.addi %arg6, %2 : i32
indexing_maps = [#map0, #map0],
iterator_types = ["parallel", "parallel", "parallel", "parallel"] }
ins(%arg0 : tensor<?x?x4x5xi32>) outs(%arg0 : tensor<?x?x4x5xi32>) {
- ^bb0(%arg6: i32, %arg7: i32): // no predecessors
+ ^bb0(%arg6: i32, %arg7: i32):
%idx = linalg.index 0 : index
%2 = arith.index_cast %idx : index to i32
%3 = arith.addi %arg6, %2 : i32
{indexing_maps = [#map2, #map3],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0 : tensor<3x5x7xf32>) outs(%1 : tensor<3x7x5xf32>) {
- ^bb0(%arg2: f32, %arg3 : f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3 : f32):
linalg.yield %arg2 : f32
} -> tensor<3x7x5xf32>
return %2 : tensor<3x7x5xf32>
{indexing_maps = [#map2, #map3],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0 : tensor<3x5x7xf32>) outs(%1 : tensor<5x7x3xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<5x7x3xf32>
return %2 : tensor<5x7x3xf32>
{indexing_maps = [#map2, #map3],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0 : tensor<3x5x7xf32>) outs(%1 : tensor<5x3x7xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
linalg.yield %arg2 : f32
} -> tensor<5x3x7xf32>
return %2 : tensor<5x3x7xf32>
{indexing_maps = [#map0, #map1],
iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<3x5x7xf32>) outs(%0 : tensor<5x3x7xf32>) {
- ^bb0(%arg2: f32, %arg3 : f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3 : f32):
linalg.yield %arg2 : f32
} -> tensor<5x3x7xf32>
%2 = tensor.collapse_shape %1 [[0], [1, 2]]
iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
ins(%arg0, %arg1 : tensor<?x?x?x5xf32>, tensor<?x?x?x5xf32>)
outs(%arg0 : tensor<?x?x?x5xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%1 = arith.mulf %arg3, %arg4 : f32
linalg.yield %1 : f32
} -> tensor<?x?x?x5xf32>
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
ins(%arg0 : tensor<6x1xf32>) outs(%0 : tensor<6x1xi32>) {
- ^bb0(%arg3: f32, %arg4: i32): // no predecessors
+ ^bb0(%arg3: f32, %arg4: i32):
%5 = arith.fptosi %arg3 : f32 to i32
linalg.yield %5 : i32
} -> tensor<6x1xi32>
linalg.generic {indexing_maps = [#map0],
iterator_types = ["parallel", "parallel", "parallel"]}
outs(%arg0 : memref<?x?x?xf32>) {
- ^bb0(%arg3: f32): // no predecessors
+ ^bb0(%arg3: f32):
%cst = arith.constant 0.000000e+00 : f32
linalg.yield %cst : f32
}
iterator_types = ["reduction"]}
ins(%arg0, %arg1 : tensor<?xi32>, tensor<?xi32>)
outs(%1, %3 : tensor<i32>, tensor<i32>) {
- ^bb0(%arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32): // no predecessors
+ ^bb0(%arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32):
%5 = arith.cmpi sge, %arg3, %arg5 : i32
%6 = select %5, %arg3, %arg5 : i32
%7 = arith.cmpi eq, %arg3, %arg5 : i32
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg3 : tensor<12x7x25xf32>) outs(%arg1 : tensor<12x25xf32>) {
- ^bb0(%arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg4: f32, %arg5: f32):
%2 = arith.addf %arg4, %arg5 : f32
linalg.yield %2 : f32
} -> tensor<12x25xf32>
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg3 : tensor<12x24xf32>) outs(%arg0 : tensor<24x12xf32>) {
- ^bb0(%arg4: f32, %arg5: f32): // no predecessors
+ ^bb0(%arg4: f32, %arg5: f32):
%2 = arith.addf %arg4, %arg5 : f32
linalg.yield %2 : f32
} -> tensor<24x12xf32>
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0], iterator_types = ["parallel", "parallel"]} outs(%arg1 : tensor<12x25xi32>) {
- ^bb0(%arg3: i32): // no predecessors
+ ^bb0(%arg3: i32):
%6 = linalg.index 0 : index
%7 = linalg.index 1 : index
%8 = arith.addi %6, %7 : index
// GENERIC-SAME: %[[IV1]]
// GENERIC: linalg.generic {{.*}} ins(%[[T2]] {{.*}} outs(%[[T3]]
%2 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction"]} ins(%0 : tensor<10x17xf32>) outs(%1 : tensor<10xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
%3 = arith.addf %arg2, %arg3 : f32
linalg.yield %3 : f32
} -> tensor<10xf32>
// GENERIC-SAME: , %[[UB1]]
// GENERIC: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
%1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<10x17xf32>) outs(%arg1 : tensor<10x8xf32>) {
- ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ ^bb0(%arg2: f32, %arg3: f32):
%2 = arith.addf %arg2, %arg3 : f32
linalg.yield %2 : f32
} -> tensor<10x8xf32>
linalg.generic #pointwise_2d_trait
ins(%arg0, %arg1 : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>)
outs(%arg2 : memref<?x?xf32, offset: ?, strides: [?, 1]>) {
- ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): // no predecessors
+ ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
%4 = arith.addf %arg4, %arg5 : f32
linalg.yield %4 : f32
}
iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
ins(%B, %A, %A, %B: memref<4x4xf32>, memref<4xf32>, memref<4xf32>, memref<4x4xf32>)
outs(%C : memref<4x4x4x4xf32>) {
- ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): // no predecessors
+ ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
%s = arith.subf %arg0, %arg1 : f32
%a = arith.addf %arg2, %s : f32
%b = arith.addf %arg3, %a : f32
],
iterator_types = ["parallel", "parallel", "reduction"]
} ins(%input : tensor<4x16x8xf32>) outs(%output : tensor<4x16xf32>) {
- ^bb0(%arg0: f32, %arg1: f32): // no predecessors
+ ^bb0(%arg0: f32, %arg1: f32):
%1 = math.exp %arg0 : f32
%2 = arith.addf %1, %arg1 : f32
linalg.yield %2 : f32
],
iterator_types = ["parallel", "reduction", "reduction", "parallel"]
} ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) outs(%output : tensor<5x2xf32>) {
- ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): // no predecessors
+ ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
%1 = math.exp %arg0 : f32
%2 = math.exp %arg1 : f32
%3 = arith.addf %1, %2 : f32
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
- ^bb0(%in0: f32, %out0: f32): // no predecessors
+ ^bb0(%in0: f32, %out0: f32):
%max = arith.maxf %in0, %out0 : f32
linalg.yield %max : f32
} -> tensor<4xf32>
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
- ^bb0(%in0: f32, %out0: f32): // no predecessors
+ ^bb0(%in0: f32, %out0: f32):
%min = arith.minf %out0, %in0 : f32
linalg.yield %min : f32
} -> tensor<4xf32>
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
- ^bb0(%in0: f32, %out0: f32): // no predecessors
+ ^bb0(%in0: f32, %out0: f32):
%mul = arith.mulf %in0, %out0 : f32
linalg.yield %mul : f32
} -> tensor<4xf32>
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
- ^bb0(%in0: i1, %out0: i1): // no predecessors
+ ^bb0(%in0: i1, %out0: i1):
%or = arith.ori %in0, %out0 : i1
linalg.yield %or : i1
} -> tensor<4xi1>
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
- ^bb0(%in0: i1, %out0: i1): // no predecessors
+ ^bb0(%in0: i1, %out0: i1):
%and = arith.andi %in0, %out0 : i1
linalg.yield %and : i1
} -> tensor<4xi1>
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
- ^bb0(%in0: i1, %out0: i1): // no predecessors
+ ^bb0(%in0: i1, %out0: i1):
%xor = arith.xori %in0, %out0 : i1
linalg.yield %xor : i1
} -> tensor<4xi1>
iterator_types = ["reduction"]}
ins(%arg0 : tensor<32xf32>)
outs(%1 : tensor<f32>) {
- ^bb0(%a: f32, %b: f32): // no predecessors
+ ^bb0(%a: f32, %b: f32):
%3 = arith.addf %a, %b : f32
linalg.yield %3 : f32
} -> tensor<f32>
// CHECK-NEXT: %[[cmp:.+]] = "test.condition"() : () -> i1
// CHECK-NEXT: scf.condition(%[[cmp]]) %[[cmp]] : i1
// CHECK-NEXT: } do {
-// CHECK-NEXT: ^bb0(%arg0: i1): // no predecessors
+// CHECK-NEXT: ^bb0(%arg0: i1):
// CHECK-NEXT: "test.use"(%[[true]]) : (i1) -> ()
// CHECK-NEXT: scf.yield
// CHECK-NEXT: }
// CHECK-NEXT: %[[cmp:.*]] = "test.condition"(%[[arg2]]) : (i32) -> i1
// CHECK-NEXT: scf.condition(%[[cmp]]) %[[arg2]] : i32
// CHECK-NEXT: } do {
-// CHECK-NEXT: ^bb0(%[[post:.+]]: i32): // no predecessors
+// CHECK-NEXT: ^bb0(%[[post:.+]]: i32):
// CHECK-NEXT: %[[next:.+]] = "test.use"(%[[post]]) : (i32) -> i32
// CHECK-NEXT: scf.yield %[[next]] : i32
// CHECK-NEXT: }
// CHECK-NEXT: %{{.*}} = "test.get_some_value"() : () -> i64
// CHECK-NEXT: scf.condition(%[[cmp]]) %[[val]] : i32
// CHECK-NEXT: } do {
-// CHECK-NEXT: ^bb0(%[[arg:.*]]: i32): // no predecessors
+// CHECK-NEXT: ^bb0(%[[arg:.*]]: i32):
// CHECK-NEXT: "test.use"(%[[arg]]) : (i32) -> ()
// CHECK-NEXT: scf.yield
// CHECK-NEXT: }
// CHECK-NEXT: %[[cmp:.+]] = arith.cmpi ne, %[[val]], %arg0 : i32
// CHECK-NEXT: scf.condition(%[[cmp]]) %[[val]] : i32
// CHECK-NEXT: } do {
-// CHECK-NEXT: ^bb0(%arg1: i32): // no predecessors
+// CHECK-NEXT: ^bb0(%arg1: i32):
// CHECK-NEXT: "test.use"(%[[true]], %[[false]], %arg1) : (i1, i1, i32) -> ()
// CHECK-NEXT: scf.yield
// CHECK-NEXT: }
// CHECK-NEXT: %[[cmp:.+]] = arith.cmpi ne, %arg0, %[[val]] : i32
// CHECK-NEXT: scf.condition(%[[cmp]]) %[[val]] : i32
// CHECK-NEXT: } do {
-// CHECK-NEXT: ^bb0(%arg1: i32): // no predecessors
+// CHECK-NEXT: ^bb0(%arg1: i32):
// CHECK-NEXT: "test.use"(%[[true]], %[[false]], %arg1) : (i1, i1, i32) -> ()
// CHECK-NEXT: scf.yield
// CHECK-NEXT: }
// CHECK-NOT: @sub
func @inlined_if_fn(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i1>) -> tensor<f32> {
%0 = "tosa.cond_if"(%arg2, %arg0, %arg1) ({
- ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
+ ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
%1 = call @add(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tosa.yield"(%1) : (tensor<f32>) -> ()
}, {
- ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
+ ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
%1 = call @sub(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tosa.yield"(%1) : (tensor<f32>) -> ()
}) : (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
// Check that calls are inlined and functions eliminated:
// CHECK-NOT: @while
%1:4 = "tosa.while_loop"(%arg0, %arg1, %arg2, %arg3) ({
- ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>): // no predecessors
+ ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>):
%2 = call @while_cond_40(%arg4, %arg5, %arg6, %arg7) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> tensor<i1>
"tosa.yield"(%2) : (tensor<i1>) -> ()
}, {
- ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>): // no predecessors
+ ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>):
%2:4 = call @while_body_50(%arg4, %arg5, %arg6, %arg7) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>)
"tosa.yield"(%2#0, %2#1, %2#2, %2#3) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> ()
}) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>)
// CHECK-LABEL: cond_if
func @test_cond_if(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i1>) -> tensor<f32> {
%0 = "tosa.cond_if"(%arg2, %arg0, %arg1) ({
- ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
+ ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
%1 = "tosa.add"(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tosa.yield"(%1) : (tensor<f32>) -> ()
}, {
- ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
+ ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
%1 = "tosa.sub"(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tosa.yield"(%1) : (tensor<f32>) -> ()
}) : (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
func @test_while_loop(%arg0: tensor<10xi32>, %arg1: tensor<i32>) {
%0 = "tosa.const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%1:3 = "tosa.while_loop"(%0, %0, %arg0) ({
- ^bb0(%arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<10xi32>): // no predecessors
+ ^bb0(%arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<10xi32>):
%2 = "tosa.greater_equal"(%arg3, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i1>
%3 = "tosa.logical_not"(%2) : (tensor<i1>) -> tensor<i1>
"tosa.yield"(%3) : (tensor<i1>) -> ()
}, {
- ^bb0(%arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<10xi32>): // no predecessors
+ ^bb0(%arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<10xi32>):
%2 = "tosa.const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
%3 = "tosa.add"(%arg3, %2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
%4 = "tosa.reshape"(%2) {new_shape = [1]} : (tensor<i32>) -> tensor<1xi32>
// Region with single block and not terminator.
// CHECK: unregistered_without_terminator
"test.unregistered_without_terminator"() ({
- ^bb0: // no predecessors
+ ^bb0:
}) : () -> ()
// -----
%9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
%10 = tensor.cast %9 : tensor<2xf32> to tensor<?xf32>
%11 = linalg.pad_tensor %10 low[%c0] high[%c0] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
linalg.yield %cst : f32
} : tensor<?xf32> to tensor<2xf32>
%12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor<?x2xf32>
%9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
%10 = tensor.cast %9 : tensor<2xf32> to tensor<?xf32>
%11 = linalg.pad_tensor %10 low[%c0] high[%c0] {
- ^bb0(%arg5: index): // no predecessors
+ ^bb0(%arg5: index):
linalg.yield %cst : f32
} : tensor<?xf32> to tensor<2xf32>
%12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor<?x2xf32>
%cst = arith.constant 2.3 : f32
%c0 = arith.constant 0 : index
%out = linalg.pad_tensor %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] {
- ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index): // no predecessors
+ ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index):
linalg.yield %cst : f32
} : tensor<1x?x3xf32> to tensor<1x?x?xf32>
%unranked = tensor.cast %out: tensor<1x?x?xf32> to tensor<*xf32>
op1 = Operation.create("custom.op1", regions=1)
block = op1.regions[0].blocks.append(i32, i32)
# CHECK: "custom.op1"() ({
- # CHECK: ^bb0(%arg0: si32, %arg1: si32): // no predecessors
+ # CHECK: ^bb0(%arg0: si32, %arg1: si32):
# CHECK: "custom.terminator"() : () -> ()
# CHECK: }) : () -> ()
terminator = Operation.create("custom.terminator")