From 1b434652c56704be90d01039f4329ea9320bc742 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Mon, 29 Aug 2022 15:43:20 -0700
Subject: [PATCH] [mlir][sparse] add more dimension level types and properties

We recently removed the singleton dimension level type (see the revision
https://reviews.llvm.org/D131002) since it was unimplemented but also
incomplete (properties were missing). This revision add singleton back as
extra dimension level type, together with properties ordered/not-ordered
and unique/not-unique. Even though still not lowered to actual code, this
provides a complete way of defining many more sparse storage schemes (in
the long run, we want to support even dimension level types and properties
using the additional extensions proposed in [Chou]).

Note that the current solution of using suffixes for the properties is not
ideal, but keeps the extension relatively simple with respect to parsing and
printing. Furthermore, it is rather consistent with the TACO implementation
which uses things like Compressed-Unique as well. Nevertheless, we probably
want to separate dimension level types from properties when we add more types
and properties.

Reviewed By: Peiming

Differential Revision: https://reviews.llvm.org/D132897
---
 mlir/include/mlir-c/Dialect/SparseTensor.h         | 14 +++++---
 .../SparseTensor/IR/SparseTensorAttrDefs.td        | 39 ++++++++++++++++++----
 .../mlir/ExecutionEngine/SparseTensorUtils.h       | 10 ++++--
 mlir/lib/Bindings/Python/DialectSparseTensor.cpp   |  9 ++++-
 mlir/lib/CAPI/Dialect/SparseTensor.cpp             | 23 ++++++++++++-
 .../SparseTensor/IR/SparseTensorDialect.cpp        | 35 +++++++++++++++++++
 .../SparseTensor/Transforms/CodegenUtils.cpp       | 14 ++++++++
 .../Transforms/SparseTensorConversion.cpp          |  2 ++
 .../Dialect/SparseTensor/roundtrip_encoding.mlir   | 20 +++++++++++
 9 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/mlir/include/mlir-c/Dialect/SparseTensor.h b/mlir/include/mlir-c/Dialect/SparseTensor.h
index ac2b8b6..9465f36 100644
--- a/mlir/include/mlir-c/Dialect/SparseTensor.h
+++ b/mlir/include/mlir-c/Dialect/SparseTensor.h
@@ -19,11 +19,8 @@ extern "C" {
 
 MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(SparseTensor, sparse_tensor);
 
-/// Dimension level types that define sparse tensors:
-///   - MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE - dimension is dense, every
-///   entry is stored
-///   - MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED - dimension is sparse,
-///   only nonzeros are stored (no duplicates).
+/// Dimension level types (and properties) that define sparse tensors.
+/// See the documentation in SparseTensorAttrDefs.td for their meaning.
 ///
 /// These correspond to SparseTensorEncodingAttr::DimLevelType in the C++ API.
 /// If updating, keep them in sync and update the static_assert in the impl
@@ -31,6 +28,13 @@ MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(SparseTensor, sparse_tensor);
 enum MlirSparseTensorDimLevelType {
   MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE,
   MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NO,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU_NO,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NO,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU_NO,
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index d9c743e..b2c27df 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -16,7 +16,7 @@ include "mlir/IR/TensorEncoding.td"
 // All of the Tensor attributes will extend this class.
 class SparseTensor_Attr<string name,
                         list<Trait> traits = []>
-	: AttrDef<SparseTensor_Dialect, name, traits>;
+    : AttrDef<SparseTensor_Dialect, name, traits>;
 
 // Sparse tensor encoding attribute.
 def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
@@ -34,9 +34,21 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     The attribute consists of the following fields.
     - Dimension level type for each dimension of a tensor type:
         - **dense** : dimension is dense, all entries along this dimension
-	  are stored.
+          are stored
         - **compressed** : dimension is sparse, only nonzeros along this dimensions
-	  are stored, without duplicates, i.e., compressed (unique).
+          are stored
+        - **singleton** : dimension stores individual indices with no siblings
+      By default, each dimension level types has the property of being unique
+      (no duplicates at that level) and ordered (indices appear sorted at that
+      level). The following two suffixes can be used to make the last two
+      dimension level types not-unique (duplicates may appear) and not-ordered
+      (indices may appear unsorted).
+        - **-nu** : not unique
+        - **-no** : not ordered
+      Currently, these suffixes, is present, should appear in this order.
+      In the future, we may introduce many more dimension level types and
+      properties, and separate specifying the two completely rather than
+      using this suffix mechanism.
     - Dimension ordering on the indices of this tensor type. Unlike dense
       storage, most sparse storage schemes do not provide fast random access.
       This affine map specifies the order of dimensions that should be supported
@@ -62,6 +74,12 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     }>
     ... tensor<?xf32, #SparseVector> ...
 
+    // Sorted Coordinate Scheme.
+    #SortedCOO = #sparse_tensor.encoding<{
+      dimLevelType = [ "compressed-nu", "singleton" ]
+    }>
+    ... tensor<?x?xf64, #SortedCOO> ...
+
     // Doubly compressed sparse column storage with specific bitwidths.
     #DCSC = #sparse_tensor.encoding<{
       dimLevelType = [ "compressed", "compressed" ],
@@ -76,10 +94,10 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
   // Data in sparse tensor encoding.
   let parameters = (
     ins
-    // A dimension level type for each dimension of a tensor type.
+    // A dimension level type for each dimension of the tensor type.
     ArrayRefParameter<
       "SparseTensorEncodingAttr::DimLevelType",
-      "Per-dimension level type (dense or compressed)"
+      "per dimension level type"
       >: $dimLevelType,
     // A dimension order on the indices of this tensor type.
     // TODO: block structure with higher-dim inputs
@@ -94,9 +112,16 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
   let hasCustomAssemblyFormat = 1;
 
   let extraClassDeclaration = [{
-    // Dimension level types.
+    // Dimension level types. By default, each type has the unique and
+    // ordered properties. Alternatives properties are indicated by
+    // Nu (not-unique) and No (not-ordered).
+    //
+    // TODO: separate type and property in encoding
+    //
     enum class DimLevelType {
-      Dense, Compressed
+      Dense,
+      Compressed, CompressedNu, CompressedNo, CompressedNuNo,
+      Singleton, SingletonNu, SingletonNo, SingletonNuNo,
     };
   }];
 }
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
index 2ed0002..4ea6697 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
@@ -109,7 +109,7 @@ enum class Action : uint32_t {
   kSparseToSparse = 3,
   kEmptyCOO = 4,
   kToCOO = 5,
-  kToIterator = 6
+  kToIterator = 6,
 };
 
 /// This enum mimics `SparseTensorEncodingAttr::DimLevelType` for
@@ -118,7 +118,13 @@ enum class Action : uint32_t {
 enum class DimLevelType : uint8_t {
   kDense = 0,
   kCompressed = 1,
-  kSingleton = 2
+  kCompressedNu = 2,
+  kCompressedNo = 3,
+  kCompressedNuNo = 4,
+  kSingleton = 5,
+  kSingletonNu = 6,
+  kSingletonNo = 7,
+  kSingletonNuNo = 8,
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp
index 49b4a89..ae9cfbb 100644
--- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp
+++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp
@@ -18,7 +18,14 @@ using namespace mlir::python::adaptors;
 static void populateDialectSparseTensorSubmodule(const py::module &m) {
   py::enum_<MlirSparseTensorDimLevelType>(m, "DimLevelType", py::module_local())
       .value("dense", MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE)
-      .value("compressed", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED);
+      .value("compressed", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED)
+      .value("compressed-nu", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU)
+      .value("compressed-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NO)
+      .value("compressed-nu-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU_NO)
+      .value("singleton", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON)
+      .value("singleton-nu", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU)
+      .value("singleton-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NO)
+      .value("singleton-nu-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU_NO);
 
   mlir_attribute_subclass(m, "EncodingAttr",
                           mlirAttributeIsASparseTensorEncodingAttr)
diff --git a/mlir/lib/CAPI/Dialect/SparseTensor.cpp b/mlir/lib/CAPI/Dialect/SparseTensor.cpp
index b7b2fd5..e3e3290 100644
--- a/mlir/lib/CAPI/Dialect/SparseTensor.cpp
+++ b/mlir/lib/CAPI/Dialect/SparseTensor.cpp
@@ -25,7 +25,28 @@ static_assert(
             static_cast<int>(SparseTensorEncodingAttr::DimLevelType::Dense) &&
         static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED) ==
             static_cast<int>(
-                SparseTensorEncodingAttr::DimLevelType::Compressed),
+                SparseTensorEncodingAttr::DimLevelType::Compressed) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::CompressedNu) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::CompressedNo) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::CompressedNuNo) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::Singleton) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::SingletonNu) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::SingletonNo) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::SingletonNuNo),
     "MlirSparseTensorDimLevelType (C-API) and DimLevelType (C++) mismatch");
 
 bool mlirAttributeIsASparseTensorEncodingAttr(MlirAttribute attr) {
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 2ff7e13..8691b94 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -72,6 +72,20 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
           dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Dense);
         } else if (strVal == "compressed") {
           dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Compressed);
+        } else if (strVal == "compressed-nu") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNu);
+        } else if (strVal == "compressed-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNo);
+        } else if (strVal == "compressed-nu-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNuNo);
+        } else if (strVal == "singleton") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Singleton);
+        } else if (strVal == "singleton-nu") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNu);
+        } else if (strVal == "singleton-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNo);
+        } else if (strVal == "singleton-nu-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNuNo);
         } else {
           parser.emitError(parser.getNameLoc(),
                            "unexpected dimension level type: ")
@@ -125,6 +139,27 @@ void SparseTensorEncodingAttr::print(AsmPrinter &printer) const {
     case DimLevelType::Compressed:
       printer << "\"compressed\"";
       break;
+    case DimLevelType::CompressedNu:
+      printer << "\"compressed-nu\"";
+      break;
+    case DimLevelType::CompressedNo:
+      printer << "\"compressed-no\"";
+      break;
+    case DimLevelType::CompressedNuNo:
+      printer << "\"compressed-nu-no\"";
+      break;
+    case DimLevelType::Singleton:
+      printer << "\"singleton\"";
+      break;
+    case DimLevelType::SingletonNu:
+      printer << "\"singleton-nu\"";
+      break;
+    case DimLevelType::SingletonNo:
+      printer << "\"singleton-no\"";
+      break;
+    case DimLevelType::SingletonNuNo:
+      printer << "\"singleton-nu-no\"";
+      break;
     }
     if (i != e - 1)
       printer << ", ";
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index d30a81d..9f9bd91 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -148,6 +148,20 @@ DimLevelType mlir::sparse_tensor::dimLevelTypeEncoding(
     return DimLevelType::kDense;
   case SparseTensorEncodingAttr::DimLevelType::Compressed:
     return DimLevelType::kCompressed;
+  case SparseTensorEncodingAttr::DimLevelType::CompressedNu:
+    return DimLevelType::kCompressedNu;
+  case SparseTensorEncodingAttr::DimLevelType::CompressedNo:
+    return DimLevelType::kCompressedNo;
+  case SparseTensorEncodingAttr::DimLevelType::CompressedNuNo:
+    return DimLevelType::kCompressedNuNo;
+  case SparseTensorEncodingAttr::DimLevelType::Singleton:
+    return DimLevelType::kSingleton;
+  case SparseTensorEncodingAttr::DimLevelType::SingletonNu:
+    return DimLevelType::kSingletonNu;
+  case SparseTensorEncodingAttr::DimLevelType::SingletonNo:
+    return DimLevelType::kSingletonNo;
+  case SparseTensorEncodingAttr::DimLevelType::SingletonNuNo:
+    return DimLevelType::kSingletonNuNo;
   }
   llvm_unreachable("Unknown SparseTensorEncodingAttr::DimLevelType");
 }
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 5bd10e8..7fab9b3 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -466,6 +466,8 @@ static bool canUseDirectConversion(
       if (alreadyCompressed)
         return false; // Dense after Compressed not yet supported.
       break;
+    default: // TODO: investigate
+      return false;
     }
   }
   return true;
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
index 33cec89..39faa50 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
@@ -42,3 +42,23 @@ func.func private @sparse_csc(tensor<?x?xf32, #CSC>)
 // CHECK-LABEL: func private @sparse_dcsc(
 // CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)>, indexBitWidth = 64 }>>)
 func.func private @sparse_dcsc(tensor<?x?xf32, #DCSC>)
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed-nu-no", "singleton-no" ]
+}>
+
+// CHECK-LABEL: func private @sparse_coo(
+// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu-no", "singleton-no" ] }>>)
+func.func private @sparse_coo(tensor<?x?xf32, #COO>)
+
+// -----
+
+#SortedCOO = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed-nu", "singleton" ]
+}>
+
+// CHECK-LABEL: func private @sparse_sorted_coo(
+// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>>)
+func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)
-- 
2.7.4