From: Peiming Liu <peiming@google.com>
Date: Tue, 21 Feb 2023 21:26:46 +0000 (+0000)
Subject: [mlir][sparse] revert optimization for dense->csc conversion.
X-Git-Tag: upstream/17.0.6~16850
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b7d86f3f1cfc8f7a0ac9e83d87767a4dac3832a0;p=platform%2Fupstream%2Fllvm.git

[mlir][sparse] revert optimization for dense->csc conversion.

Eliminates the sort seems make the whole conversion slower (probably because loop rotation leads to bad locality).

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D144517
---

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 2051ac8..ef78977 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -669,16 +669,19 @@ private:
     }
 
     const auto encDst = dstTp.getEncoding();
-    // We don't need a temporary COO tensor for dense => sparse conversion.
-    const RankedTensorType bufferTp = dstTp.getRankedTensorType();
+    // We don't need a temporary COO tensor if the destination has an identity
+    // ordering. Otherwise, we use the destination ordering for the temporary
+    // COO tensor.
+    // TODO: enhance foreachOp to take ordering to remove the need of a
+    // temporary COO tensor here.
+    const RankedTensorType bufferTp = dstTp.isIdentity()
+                                          ? dstTp.getRankedTensorType()
+                                          : getUnorderedCOOFromTypeWithOrdering(
+                                                dstTp, dstTp.getDimToLvlMap());
     auto buffer =
         rewriter.create<AllocTensorOp>(loc, bufferTp, dynSizes).getResult();
-    AffineMapAttr foreachOrder = nullptr;
-    if (encDst.getDimOrdering())
-      foreachOrder = AffineMapAttr::get(encDst.getDimOrdering());
-
     auto foreachOp = rewriter.create<ForeachOp>(
-        loc, src, buffer, foreachOrder,
+        loc, src, buffer,
         [&](OpBuilder &builder, Location loc, ValueRange indices, Value v,
             ValueRange reduc) {
           Value input = reduc.front();
@@ -706,8 +709,14 @@ private:
         });
     rewriter.setInsertionPointAfter(op);
     src = rewriter.create<LoadOp>(loc, foreachOp.getResult(0), true);
+    if (bufferTp != dstTp) {
+      rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp.getRankedTensorType(),
+                                             src);
+      rewriter.create<DeallocTensorOp>(loc, src);
+    } else {
+      rewriter.replaceOp(op, src);
+    }
 
-    rewriter.replaceOp(op, src);
     return success();
   }
 
diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
index 92f6376..2646b2d 100644
--- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
@@ -183,17 +183,30 @@ func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
   return %1 : tensor<8x7xf32, #CSR>
 }
 
-// CHECK-RWT-LABEL:   func.func @sparse_constant_csc() -> tensor<8x7xf32,
-//       CHECK-RWT:    %[[VAL_0:.*]] = arith.constant sparse<{{\[\[}}0, 0], [1, 6]], [1.000000e+00, 5.000000e+00]> : tensor<8x7xf32>
-//       CHECK-RWT:    %[[VAL_1:.*]] = bufferization.alloc_tensor() :
-//       CHECK-RWT:    %[[VAL_2:.*]] = sparse_tensor.foreach in %[[VAL_0]] init(%[[VAL_1]]) {order = #map} : tensor<8x7xf32>,
-//       CHECK-RWT:    ^bb0(%[[VAL_3:.*]]: index, %[[VAL_4:.*]]: index, %[[VAL_5:.*]]: f32, %[[VAL_6:.*]]: tensor
-//       CHECK-RWT:      %[[VAL_7:.*]] = sparse_tensor.insert %[[VAL_5]] into %[[VAL_6]]{{\[}}%[[VAL_4]], %[[VAL_3]]] :
-//       CHECK-RWT:      sparse_tensor.yield %[[VAL_7]] :
-//       CHECK-RWT:    }
-//       CHECK-RWT:    %[[VAL_8:.*]] = sparse_tensor.load %[[VAL_9:.*]] hasInserts :
-//       CHECK-RWT:    return %[[VAL_8]] :
-//       CHECK-RWT:  }
+// CHECK-RWT-LABEL:   func.func @sparse_constant_csc() -> tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)> }>> {
+//       CHECK-RWT:     %[[F0:.*]] = arith.constant sparse<{{\[\[}}0, 0], [1, 6]], [1.000000e+00, 5.000000e+00]> : tensor<8x7xf32>
+//       CHECK-RWT:     %[[T0:.*]] = bufferization.alloc_tensor()
+//       CHECK-RWT:     %[[T1:.*]] = sparse_tensor.foreach in %[[F0]] init(%[[T0]])
+//       CHECK-RWT:     ^bb0(%[[L0I0:.*]]: index, %[[L0I1:.*]]: index, %[[L0V:.*]]: f32, %[[L0T:.*]]: tensor
+//       CHECK-RWT:       %[[L0T2:.*]] = sparse_tensor.insert %[[L0V]] into %[[L0T]]{{\[}}%[[L0I1]], %[[L0I0]]]
+//       CHECK-RWT:       sparse_tensor.yield %[[L0T2]]
+//       CHECK-RWT:     }
+//       CHECK-RWT:     %[[COO:.*]] = sparse_tensor.load %[[T1]] hasInserts
+//       CHECK-RWT:     %[[NNZ:.*]] = sparse_tensor.number_of_entries %[[COO]]
+//       CHECK-RWT:     %[[V:.*]] = sparse_tensor.values %[[COO]]
+//       CHECK-RWT:     %[[I:.*]] = sparse_tensor.indices_buffer %[[COO]]
+//       CHECK-RWT:     sparse_tensor.sort_coo hybrid_quick_sort %[[NNZ]], %[[I]] jointly %[[V]] {nx = 2 : index, ny = 0 : index}
+//       CHECK-RWT:     %[[T3:.*]] = bufferization.alloc_tensor()
+//       CHECK-RWT:     %[[T4:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[T3]])
+//       CHECK-RWT:     ^bb0(%[[L1I0:.*]]: index, %[[L1I1:.*]]: index, %[[L1V:.*]]: f32, %[[L1T:.*]]: tensor
+//       CHECK-RWT:       %[[L1T2:.*]] = sparse_tensor.insert %[[L1V]] into %[[L1T]]{{\[}}%[[L1I1]], %[[L1I0]]]
+//       CHECK-RWT:       sparse_tensor.yield %[[L1T2]]
+//       CHECK-RWT:     }
+//       CHECK-RWT:     %[[T5:.*]] = sparse_tensor.load %[[T4]] hasInserts
+//       CHECK-RWT:     %[[T6:.*]] = sparse_tensor.convert %[[T5]]
+//       CHECK-RWT:     bufferization.dealloc_tensor %[[COO]]
+//       CHECK-RWT:     return %[[T6]]
+//       CHECK-RWT:   }
 func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
   // Initialize a tensor.
   %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32>