[mlir][sparse] mask reduction update

author Aart Bik <ajcbik@google.com>

Fri, 5 Mar 2021 03:05:37 +0000 (19:05 -0800)

committer Aart Bik <ajcbik@google.com>

Fri, 5 Mar 2021 16:56:10 +0000 (08:56 -0800)
author Aart Bik <ajcbik@google.com>
Fri, 5 Mar 2021 03:05:37 +0000 (19:05 -0800)
committer Aart Bik <ajcbik@google.com>
Fri, 5 Mar 2021 16:56:10 +0000 (08:56 -0800)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp

index a090e55..30d9c7d 100644 (file)
--- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
@@ -635,8 +635,8 @@ static Value genVectorMask(CodeGen &codegen, PatternRewriter &rewriter,
        matchPattern(hi, m_Constant(&hiInt)) &&
        matchPattern(step, m_Constant(&stepInt))) {
      if (((hiInt.getInt() - loInt.getInt()) % stepInt.getInt()) == 0)
-      return rewriter.create<vector::ConstantMaskOp>(
-          loc, mtp, rewriter.getI64ArrayAttr(codegen.curVecLength));
+      return rewriter.create<vector::BroadcastOp>(
+          loc, mtp, rewriter.create<ConstantIntOp>(loc, 1, 1));
    }
    // Otherwise, generate a vector mask that avoids overrunning the upperbound
    // during vector execution. Here we rely on subsequent loop optimizations to
@@ -723,9 +723,13 @@ static Value genTensorLoad(Merger &merger, CodeGen &codegen,
  static void genTensorStore(Merger &merger, CodeGen &codegen,
                             PatternRewriter &rewriter, linalg::GenericOp op,
                             unsigned tensor, Value rhs) {
+  Location loc = op.getLoc();
    // Test if this is a scalarized reduction.
    unsigned lhs = op.getNumShapedOperands() - 1;
    if (lhs == tensor && codegen.redVal) {
+    if (codegen.curVecLength > 1)
+      rhs = rewriter.create<SelectOp>(loc, codegen.curVecMask, rhs,
+                                      codegen.redVal);
      codegen.redVal = rhs;
      return;
    }
@@ -736,7 +740,6 @@ static void genTensorStore(Merger &merger, CodeGen &codegen,
      unsigned idx = map.getDimPosition(i);
      args.push_back(codegen.loops[idx]); // universal dense index
    }
-  Location loc = op.getLoc();
    Value ptr = codegen.buffers[tensor];
    if (codegen.curVecLength > 1)
      genVectorStore(codegen, rewriter, rhs, ptr, args);
@@ -798,7 +801,7 @@ static void genReductionEnd(Merger &merger, CodeGen &codegen,
      return;
    codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain
    unsigned lhs = op.getNumShapedOperands() - 1;
-  if (codegen.curVecLength > 1) {
+  if (red.getType().isa<VectorType>()) {
      // TODO: assumes + reductions for now
      codegen.curVecLength = 1;
      Value ld = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp);
diff --git a/mlir/test/Dialect/Linalg/sparse_vector.mlir b/mlir/test/Dialect/Linalg/sparse_vector.mlir

index 1ecfe27..beed6e8 100644 (file)
--- a/mlir/test/Dialect/Linalg/sparse_vector.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_vector.mlir
@@ -250,6 +250,37 @@ func @reduction_d(%arga: tensor<1024xf32>, %argb: tensor<1024xf32>, %argx: tenso
    return %0 : tensor<f32>
  }
  
+//
+// CHECK-VEC1-LABEL: func @reduction_17
+// CHECK-VEC1-DAG:   %[[c0:.*]] = constant 0 : index
+// CHECK-VEC1-DAG:   %[[c16:.*]] = constant 16 : index
+// CHECK-VEC1-DAG:   %[[c17:.*]] = constant 17 : index
+// CHECK-VEC1-DAG:   %[[v0:.*]] = constant dense<0.000000e+00> : vector<16xf32>
+// CHECK-VEC1:       %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c17]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[v0]]) -> (vector<16xf32>) {
+// CHECK-VEC1:         %[[sub:.*]] = subi %[[c17]], %[[i]] : index
+// CHECK-VEC1:         %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1>
+// CHECK-VEC1:         %[[la:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %{{.*}} : memref<17xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
+// CHECK-VEC1:         %[[lb:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %{{.*}} : memref<17xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
+// CHECK-VEC1:         %[[m:.*]] = mulf %[[la]], %[[lb]] : vector<16xf32>
+// CHECK-VEC1:         %[[a:.*]] = addf %[[red_in]], %[[m]] : vector<16xf32>
+// CHECK-VEC1:         %[[s:.*]] = select %[[mask]], %[[a]], %[[red_in]] : vector<16xi1>, vector<16xf32>
+// CHECK-VEC1:         scf.yield %[[s]] : vector<16xf32>
+// CHECK-VEC1:       }
+// CHECK-VEC1:       %{{.*}} = vector.reduction "add", %[[red]], %{{.*}} : vector<16xf32> into f32
+// CHECK-VEC1:       return
+//
+func @reduction_17(%arga: tensor<17xf32>, %argb: tensor<17xf32>, %argx: tensor<f32>) -> tensor<f32> {
+  %0 = linalg.generic #trait_reduction_d
+    ins(%arga, %argb: tensor<17xf32>, tensor<17xf32>)
+    outs(%argx: tensor<f32>) {
+      ^bb(%a: f32, %b: f32, %x: f32):
+        %0 = mulf %a, %b : f32
+        %1 = addf %x, %0 : f32
+        linalg.yield %1 : f32
+  } -> tensor<f32>
+  return %0 : tensor<f32>
+}
+
  #trait_mul_ds = {
    indexing_maps = [
      affine_map<(i,j) -> (i,j)>,  // a
author	Aart Bik <ajcbik@google.com>
	Fri, 5 Mar 2021 03:05:37 +0000 (19:05 -0800)
committer	Aart Bik <ajcbik@google.com>
	Fri, 5 Mar 2021 16:56:10 +0000 (08:56 -0800)
mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp		patch \| blob \| history
mlir/test/Dialect/Linalg/sparse_vector.mlir		patch \| blob \| history