[mlir][Linalg] Enable vectorization of 'mul', 'and', 'or' and 'xor' reductions

author Diego Caballero <diegocaballero@google.com>

Tue, 12 Oct 2021 20:46:08 +0000 (20:46 +0000)

committer Diego Caballero <diegocaballero@google.com>

Tue, 12 Oct 2021 21:08:23 +0000 (21:08 +0000)
author Diego Caballero <diegocaballero@google.com>
Tue, 12 Oct 2021 20:46:08 +0000 (20:46 +0000)
committer Diego Caballero <diegocaballero@google.com>
Tue, 12 Oct 2021 21:08:23 +0000 (21:08 +0000)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

index cbe7e3f..a1ea542 100644 (file)
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -122,10 +122,14 @@ getKindForOp(Operation *reductionOp) {
    return llvm::TypeSwitch<Operation *, llvm::Optional<vector::CombiningKind>>(
               reductionOp)
        .Case<AddIOp, AddFOp>([&](auto op) { return vector::CombiningKind::ADD; })
+      .Case<AndOp>([&](auto op) { return vector::CombiningKind::AND; })
        .Case<MaxSIOp>([&](auto op) { return vector::CombiningKind::MAXSI; })
        .Case<MaxFOp>([&](auto op) { return vector::CombiningKind::MAXF; })
        .Case<MinSIOp>([&](auto op) { return vector::CombiningKind::MINSI; })
        .Case<MinFOp>([&](auto op) { return vector::CombiningKind::MINF; })
+      .Case<MulIOp, MulFOp>([&](auto op) { return vector::CombiningKind::MUL; })
+      .Case<OrOp>([&](auto op) { return vector::CombiningKind::OR; })
+      .Case<XOrOp>([&](auto op) { return vector::CombiningKind::XOR; })
        .Default([&](auto op) { return llvm::None; });
  }
  
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir

index 1e6c801..d3aa8c3 100644 (file)
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -822,9 +822,9 @@ func @red_max_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
    // CHECK: maxf {{.*}} : vector<4x4xf32>
    // CHECK: vector.multi_reduction #vector.kind<maxf>, {{.*}} [1] : vector<4x4xf32> to vector<4xf32>
    // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
-  %minf32 = constant -3.40282e+38 : f32
+  %ident = constant -3.40282e+38 : f32
    %init = linalg.init_tensor [4] : tensor<4xf32>
-  %fill = linalg.fill(%minf32, %init) : f32, tensor<4xf32> -> tensor<4xf32>
+  %fill = linalg.fill(%ident, %init) : f32, tensor<4xf32> -> tensor<4xf32>
    %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                            affine_map<(d0, d1) -> (d0)>],
                           iterator_types = ["parallel", "reduction"]}
@@ -863,6 +863,106 @@ func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
  
  // -----
  
+// CHECK-LABEL:   func @red_mul_2d(
+func @red_mul_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xf32>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xf32>, vector<4x4xf32>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xf32>, vector<4x4xf32>
+  // CHECK: mulf {{.*}} : vector<4x4xf32>
+  // CHECK: vector.multi_reduction #vector.kind<mul>, {{.*}} [1] : vector<4x4xf32> to vector<4xf32>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
+  %ident = constant 1.0 : f32
+  %init = linalg.init_tensor [4] : tensor<4xf32>
+  %fill = linalg.fill(%ident, %init) : f32, tensor<4xf32> -> tensor<4xf32>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+  ^bb0(%in0: f32, %out0: f32):  // no predecessors
+    %mul = mulf %in0, %out0 : f32
+    linalg.yield %mul : f32
+  } -> tensor<4xf32>
+  return %red : tensor<4xf32>
+}
+
+// -----
+
+// CHECK-LABEL:   func @red_or_2d(
+func @red_or_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xi1>, vector<4x4xi1>
+  // CHECK: or {{.*}} : vector<4x4xi1>
+  // CHECK: vector.multi_reduction #vector.kind<or>, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  %ident = constant false
+  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+  ^bb0(%in0: i1, %out0: i1):  // no predecessors
+    %or = or %in0, %out0 : i1
+    linalg.yield %or : i1
+  } -> tensor<4xi1>
+  return %red : tensor<4xi1>
+}
+
+// -----
+
+// CHECK-LABEL:   func @red_and_2d(
+func @red_and_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xi1>, vector<4x4xi1>
+  // CHECK: and {{.*}} : vector<4x4xi1>
+  // CHECK: vector.multi_reduction #vector.kind<and>, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  %ident = constant true
+  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+  ^bb0(%in0: i1, %out0: i1):  // no predecessors
+    %and = and %in0, %out0 : i1
+    linalg.yield %and : i1
+  } -> tensor<4xi1>
+  return %red : tensor<4xi1>
+}
+
+// -----
+
+// CHECK-LABEL:   func @red_xor_2d(
+func @red_xor_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
+  // CHECK: linalg.init_tensor [4] : tensor<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4x4xi1>, vector<4x4xi1>
+  // CHECK: vector.transfer_read {{.*}} : tensor<4xi1>, vector<4x4xi1>
+  // CHECK: xor {{.*}} : vector<4x4xi1>
+  // CHECK: vector.multi_reduction #vector.kind<xor>, {{.*}} [1] : vector<4x4xi1> to vector<4xi1>
+  // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
+  %ident = constant false
+  %init = linalg.init_tensor [4] : tensor<4xi1>
+  %fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                          affine_map<(d0, d1) -> (d0)>],
+                         iterator_types = ["parallel", "reduction"]}
+                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+  ^bb0(%in0: i1, %out0: i1):  // no predecessors
+    %xor = xor %in0, %out0 : i1
+    linalg.yield %xor : i1
+  } -> tensor<4xi1>
+  return %red : tensor<4xi1>
+}
+
+// -----
+
  // CHECK-DAG: #[[$M5:.*]] = affine_map<(d0, d1) -> (d0, 0)>
  
  // CHECK-LABEL:   func @explicit_broadcast(
author	Diego Caballero <diegocaballero@google.com>
	Tue, 12 Oct 2021 20:46:08 +0000 (20:46 +0000)
committer	Diego Caballero <diegocaballero@google.com>
	Tue, 12 Oct 2021 21:08:23 +0000 (21:08 +0000)
mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp		patch \| blob \| history
mlir/test/Dialect/Linalg/vectorization.mlir		patch \| blob \| history