[mlir][sparse] fix bug in workspace dimension computation

author Aart Bik <ajcbik@google.com>

Thu, 8 Sep 2022 05:46:31 +0000 (22:46 -0700)

committer Aart Bik <ajcbik@google.com>

Thu, 8 Sep 2022 15:25:02 +0000 (08:25 -0700)
author Aart Bik <ajcbik@google.com>
Thu, 8 Sep 2022 05:46:31 +0000 (22:46 -0700)
committer Aart Bik <ajcbik@google.com>
Thu, 8 Sep 2022 15:25:02 +0000 (08:25 -0700)
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp

index 845d5c8..df7a7de 100644 (file)
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -1166,10 +1166,15 @@ public:
      Type idxType = rewriter.getIndexType();
      // All initialization should be done on entry of the loop nest.
      rewriter.setInsertionPointAfter(op.getTensor().getDefiningOp());
-    // Determine the size for access expansion.
+    // Determine the size for access expansion (always the innermost stored
+    // dimension size, but we need to translate it back to the original
+    // dimension since the dim size utility applies dimension ordering).
      auto enc = getSparseTensorEncoding(srcType);
      Value src = adaptor.getOperands()[0];
-    Value sz = genDimSizeCall(rewriter, loc, enc, src, srcType.getRank() - 1);
+    unsigned innerDim = srcType.getRank() - 1;
+    if (AffineMap p = enc.getDimOrdering())
+      innerDim = p.getDimPosition(innerDim);
+    Value sz = genDimSizeCall(rewriter, loc, enc, src, innerDim);
      // Allocate temporary buffers for values, filled-switch, and indices.
      // We do not use stack buffers for this, since the expanded size may
      // be rather large (as it envelops a single expanded dense dimension).
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir

index 23a2f2c..23bec35 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -494,7 +494,9 @@ func.func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>,
  }
  
  // CHECK-LABEL: func @sparse_expansion()
-//       CHECK: %[[S:.*]] = call @sparseDimSize
+//  CHECK-DAG:  %[[C:.*]] = arith.constant 1 : index
+//       CHECK: %[[N:.*]] = call @newSparseTensor
+//       CHECK: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]]) : (!llvm.ptr<i8>, index) -> index
  //       CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
  //       CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
  //       CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
@@ -502,9 +504,9 @@ func.func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>,
  //   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
  //       CHECK: return %[[C]] : memref<?xindex>
  func.func @sparse_expansion() -> memref<?xindex> {
-  %0 = bufferization.alloc_tensor() : tensor<8x8xf64, #SparseMatrix>
+  %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #SparseMatrix>
    %values, %filled, %added, %count = sparse_tensor.expand %0
-    : tensor<8x8xf64, #SparseMatrix> to memref<?xf64>, memref<?xi1>, memref<?xindex>, index
+    : tensor<4x8xf64, #SparseMatrix> to memref<?xf64>, memref<?xi1>, memref<?xindex>, index
    return %added : memref<?xindex>
  }
  
diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir

index 6b8a0f8..d266d4b 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
@@ -1,8 +1,21 @@
-// RUN: mlir-opt %s -sparsification                           | \
+// RUN: mlir-opt %s --linalg-generalize-named-ops \
+// RUN:             --linalg-fuse-elementwise-ops \
+// RUN:             --sparsification | \
  // RUN:   FileCheck %s --check-prefix=CHECK-SPARSE
-// RUN: mlir-opt %s -sparsification -sparse-tensor-conversion | \
+// RUN: mlir-opt %s --linalg-generalize-named-ops \
+// RUN:             --linalg-fuse-elementwise-ops \
+// RUN:             --sparsification --sparse-tensor-conversion --cse | \
  // RUN:   FileCheck %s --check-prefix=CHECK-CONVERT
  
+#CSR = #sparse_tensor.encoding<{
+  dimLevelType = [  "dense", "compressed" ]
+}>
+
+#CSC = #sparse_tensor.encoding<{
+  dimLevelType = [  "dense", "compressed" ],
+  dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
  #DCSC = #sparse_tensor.encoding<{
    dimLevelType = [  "compressed", "compressed" ],
    dimOrdering = affine_map<(i,j) -> (j,i)>
@@ -24,22 +37,28 @@
  //
  // CHECK-SPARSE-LABEL: func @kernel(
  // CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand
-// CHECK-SPARSE: scf.for
-// CHECK-SPARSE:   scf.for
+// CHECK-SPARSE: scf.for {{.*}} {
+// CHECK-SPARSE:   scf.for {{.*}} {
+// CHECK-SPARSE:   }
+// CHECK-SPARSE: }
  // CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]]
  // CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts
  // CHECK-SPARSE: return %[[RET]]
  //
  // CHECK-CONVERT-LABEL: func @kernel(
+// CHECK-CONVERT: %[[C:.*]] = arith.constant 0 : index
  // CHECK-CONVERT: %{{.*}} = call @sparseDimSize
-// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize
+// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
+// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]])
  // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
  // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
  // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
  // CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
  // CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
-// CHECK-CONVERT: scf.for
-// CHECK-CONVERT:   scf.for
+// CHECK-CONVERT: scf.for {{.*}} {
+// CHECK-CONVERT:   scf.for {{.*}} {
+// CHECK-CONVERT:   }
+// CHECK-CONVERT: }
  // CHECK-CONVERT: call @expInsertF64
  // CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
  // CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
@@ -59,3 +78,99 @@ func.func @kernel(%arga: tensor<?x?xf64, #DCSC>) -> tensor<?xf64, #SV> {
    } -> tensor<?xf64, #SV>
    return %0 : tensor<?xf64, #SV>
  }
+
+//
+// CHECK-SPARSE-LABEL: func @matmul1(
+// CHECK-SPARSE-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-SPARSE-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-SPARSE-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-SPARSE: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {
+// CHECK-SPARSE:   %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand
+// CHECK-SPARSE:   scf.for {{.*}} {
+// CHECK-SPARSE:     scf.for {{.*}} {
+// CHECK-SPARSE:     }
+// CHECK-SPARSE:   }
+// CHECK-SPARSE:   sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]]
+// CHECK-SPARSE: }
+// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts
+// CHECK-SPARSE: return %[[RET]]
+//
+// CHECK-CONVERT-LABEL: func @matmul1(
+// CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-CONVERT-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
+// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C1]])
+// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
+// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
+// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {
+// CHECK-CONVERT:   scf.for {{.*}} {
+// CHECK-CONVERT:     scf.for {{.*}} {
+// CHECK-CONVERT:     }
+// CHECK-CONVERT:   }
+// CHECK-CONVERT:   call @expInsertF64
+// CHECK-CONVERT: }
+// CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
+// CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
+// CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
+// CHECK-CONVERT: call @endInsert
+//
+func.func @matmul1(%A: tensor<8x2xf64, #CSR>,
+                   %B: tensor<2x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> {
+  %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR>
+  %D = linalg.matmul
+    ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>)
+       outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR>
+  return %D: tensor<8x4xf64, #CSR>
+}
+
+//
+// CHECK-SPARSE-LABEL: func @matmul2(
+// CHECK-SPARSE-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-SPARSE-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-SPARSE-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-SPARSE: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {
+// CHECK-SPARSE:   %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand
+// CHECK-SPARSE:   scf.for {{.*}} {
+// CHECK-SPARSE:     scf.for {{.*}} {
+// CHECK-SPARSE:     }
+// CHECK-SPARSE:   }
+// CHECK-SPARSE:   sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]]
+// CHECK-SPARSE: }
+// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts
+// CHECK-SPARSE: return %[[RET]]
+//
+// CHECK-CONVERT-LABEL: func @matmul2(
+// CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-CONVERT-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
+// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C1]])
+// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
+// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
+// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {
+// CHECK-CONVERT:   scf.for {{.*}} {
+// CHECK-CONVERT:     scf.for {{.*}} {
+// CHECK-CONVERT:     }
+// CHECK-CONVERT:   }
+// CHECK-CONVERT:   call @expInsertF64
+// CHECK-CONVERT: }
+// CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
+// CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
+// CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
+// CHECK-CONVERT: call @endInsert
+//
+func.func @matmul2(%A: tensor<8x2xf64, #CSC>,
+                   %B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> {
+  %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
+  %D = linalg.matmul
+    ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
+       outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+  return %D: tensor<8x4xf64, #CSC>
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir

new file mode 100644 (file)

index 0000000..6eb07e4
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
@@ -0,0 +1,79 @@
+// RUN: mlir-opt %s --sparse-compiler | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#CSC = #sparse_tensor.encoding<{
+  dimLevelType = [ "dense", "compressed" ],
+  dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
+module {
+  //
+  // Column-wise storage forces the ijk loop to permute into jki
+  // so that access pattern expansion (workspace) needs to be
+  // done along dimension with size 8.
+  //
+  func.func @matmul(%A: tensor<8x2xf64, #CSC>,
+                    %B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> {
+    %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
+    %D = linalg.matmul
+      ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
+         outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+    return %D: tensor<8x4xf64, #CSC>
+  }
+
+  //
+  // Main driver.
+  //
+  func.func @entry() {
+    %c0 = arith.constant 0 : index
+    %d1 = arith.constant -1.0 : f64
+
+    // Initialize various dense matrices for stress testing.
+    %da = arith.constant dense<[
+        [ 1.1, 2.1 ],
+        [ 1.2, 2.2 ],
+        [ 1.3, 2.3 ],
+        [ 1.4, 2.4 ],
+        [ 1.5, 2.5 ],
+        [ 1.6, 2.6 ],
+        [ 1.7, 2.7 ],
+        [ 1.8, 2.8 ]
+    ]> : tensor<8x2xf64>
+    %db = arith.constant dense<[
+        [ 10.1, 11.1, 12.1, 13.1 ],
+        [ 10.2, 11.2, 12.2, 13.2 ]
+    ]> : tensor<2x4xf64>
+
+    // Convert all these matrices to sparse format.
+    %x1 = sparse_tensor.convert %da : tensor<8x2xf64> to tensor<8x2xf64, #CSC>
+    %x2 = sparse_tensor.convert %db : tensor<2x4xf64> to tensor<2x4xf64, #CSC>
+
+    // Call kernels with dense.
+    %x3 = call @matmul(%x1, %x2)
+       : (tensor<8x2xf64, #CSC>,
+          tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+
+    //
+    // CHECK:    ( ( 32.53, 35.73, 38.93, 42.13 ),
+    // CHECK-SAME: ( 34.56, 37.96, 41.36, 44.76 ),
+    // CHECK-SAME: ( 36.59, 40.19, 43.79, 47.39 ),
+    // CHECK-SAME: ( 38.62, 42.42, 46.22, 50.02 ),
+    // CHECK-SAME: ( 40.65, 44.65, 48.65, 52.65 ),
+    // CHECK-SAME: ( 42.68, 46.88, 51.08, 55.28 ),
+    // CHECK-SAME: ( 44.71, 49.11, 53.51, 57.91 ),
+    // CHECK-SAME: ( 46.74, 51.34, 55.94, 60.54 ) )
+    //
+    %xc = sparse_tensor.convert %x3 : tensor<8x4xf64, #CSC> to tensor<8x4xf64>
+    %xv = vector.transfer_read %xc[%c0, %c0], %d1 : tensor<8x4xf64>, vector<8x4xf64>
+    vector.print %xv : vector<8x4xf64>
+
+    // Release the resources.
+    bufferization.dealloc_tensor %x1 : tensor<8x2xf64, #CSC>
+    bufferization.dealloc_tensor %x2 : tensor<2x4xf64, #CSC>
+    bufferization.dealloc_tensor %x3 : tensor<8x4xf64, #CSC>
+
+    return
+  }
+}
author	Aart Bik <ajcbik@google.com>
	Thu, 8 Sep 2022 05:46:31 +0000 (22:46 -0700)
committer	Aart Bik <ajcbik@google.com>
	Thu, 8 Sep 2022 15:25:02 +0000 (08:25 -0700)
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp		patch \| blob \| history
mlir/test/Dialect/SparseTensor/conversion.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/sparse_expand.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir	[new file with mode: 0644]	patch \| blob