From 0bfaa301e28dacc9b79ecdff12254439e2e8458a Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 16 Sep 2022 17:55:50 -0700 Subject: [PATCH] [mlir][sparse] implement singleton dimension level type This is a first step towards fully implementing the new dimension level types and properties, illustrating with a fully functional sorted COO of any dimension. Note that the sparsification part is pretty complete. The required parts in the runtime support library have been kept to a minimum, to avoid huge conflicts with Wren's ongoing refactoring. The missing parts will be filled in later. Reviewed By: Peiming Differential Revision: https://reviews.llvm.org/D134096 --- mlir/test/Dialect/SparseTensor/sorted_coo.mlir | 96 +++++++++ .../SparseTensor/CPU/sparse_sorted_coo.mlir | 230 +++++++++++++++++++++ 2 files changed, 326 insertions(+) create mode 100644 mlir/test/Dialect/SparseTensor/sorted_coo.mlir create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir new file mode 100644 index 0000000..f77abfa --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir @@ -0,0 +1,96 @@ +// RUN: mlir-opt %s -sparsification | FileCheck %s + +#SortedCOO = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ] +}> + +#trait_scale = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = X(i,j) * 2.0" +} + +#trait_matvec = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (j)>, // b + affine_map<(i,j) -> (i)> // x (out) + ], + iterator_types = ["parallel","reduction"], + doc = "x(i) += A(i,j) * b(j)" +} + +// +// Two kernels that operate on SortedCOO format. +// + +// CHECK-LABEL: func.func @sparse_scale( +// CHECK-SAME: %[[VAL_0:.*]]: tensor>) -> tensor> { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor> to memref +// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref +// CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_1]]] : memref +// CHECK: %[[VAL_7:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref +// CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_2]] { +// CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_10:.*]] = arith.mulf %[[VAL_9]], %[[VAL_3]] : f32 +// CHECK: memref.store %[[VAL_10]], %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref +// CHECK: } +// CHECK: %[[VAL_11:.*]] = sparse_tensor.load %[[VAL_0]] : tensor> +// CHECK: return %[[VAL_11]] : tensor> +// CHECK: } +func.func @sparse_scale(%argx: tensor) -> tensor { + %c = arith.constant 2.0 : f32 + %0 = linalg.generic #trait_scale + outs(%argx: tensor) { + ^bb(%x: f32): + %1 = arith.mulf %x, %c : f32 + linalg.yield %1 : f32 + } -> tensor + return %0 : tensor +} + +// CHECK-LABEL: func.func @matvec( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<64xf64>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref +// CHECK: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> +// CHECK: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref +// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { +// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf64> +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<64xf64> +// CHECK: %[[VAL_19:.*]] = arith.mulf %[[VAL_17]], %[[VAL_18]] : f64 +// CHECK: %[[VAL_20:.*]] = arith.addf %[[VAL_15]], %[[VAL_19]] : f64 +// CHECK: memref.store %[[VAL_20]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf64> +// CHECK: } +// CHECK: %[[VAL_21:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf64> +// CHECK: return %[[VAL_21]] : tensor<32xf64> +// CHECK: } +func.func @matvec(%arga: tensor<32x64xf64, #SortedCOO>, + %argb: tensor<64xf64>, + %argx: tensor<32xf64>) -> tensor<32xf64> { + %0 = linalg.generic #trait_matvec + ins(%arga, %argb : tensor<32x64xf64, #SortedCOO>, tensor<64xf64>) + outs(%argx: tensor<32xf64>) { + ^bb(%A: f64, %b: f64, %x: f64): + %0 = arith.mulf %A, %b : f64 + %1 = arith.addf %x, %0 : f64 + linalg.yield %1 : f64 + } -> tensor<32xf64> + return %0 : tensor<32xf64> +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir new file mode 100644 index 0000000..06c7c89 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir @@ -0,0 +1,230 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// RUN: TENSOR1="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +!Filename = !llvm.ptr + +#SortedCOO = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ] +}> + +#SortedCOOPermuted = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +#SortedCOO3D = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton-nu", "singleton" ] +}> + +#SortedCOO3DPermuted = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed-nu", "singleton-nu", "singleton" ], + dimOrdering = affine_map<(i,j,k) -> (k,i,j)> +}> + +#trait_scale = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = X(i,j) * 2.0" +} + +// +// Tests reading in matrix/tensor from file into Sorted COO formats +// as well as applying various operations to this format. +// +module { + + func.func private @getTensorFilename(index) -> (!Filename) + + // + // A kernel that scales a sparse matrix A by a factor of 2.0. + // + func.func @sparse_scale(%argx: tensor) + -> tensor { + %c = arith.constant 2.0 : f64 + %0 = linalg.generic #trait_scale + outs(%argx: tensor) { + ^bb(%x: f64): + %1 = arith.mulf %x, %c : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + func.func @dumpi(%arg0: memref) { + %c0 = arith.constant 0 : index + %v = vector.transfer_read %arg0[%c0], %c0: memref, vector<20xindex> + vector.print %v : vector<20xindex> + return + } + + func.func @dumpf(%arg0: memref) { + %c0 = arith.constant 0 : index + %nan = arith.constant 0x7FF0000001000000 : f64 + %v = vector.transfer_read %arg0[%c0], %nan: memref, vector<20xf64> + vector.print %v : vector<20xf64> + return + } + + func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %fileName0 = call @getTensorFilename(%c0) : (index) -> (!Filename) + %fileName1 = call @getTensorFilename(%c1) : (index) -> (!Filename) + + // Read the sparse tensors from file, construct sparse storage. + %0 = sparse_tensor.new %fileName0 : !Filename to tensor + %1 = sparse_tensor.new %fileName0 : !Filename to tensor + %2 = sparse_tensor.new %fileName1 : !Filename to tensor + %3 = sparse_tensor.new %fileName1 : !Filename to tensor + + // Conversion from literal. + %m = arith.constant sparse< + [ [0,0], [1,3], [2,0], [2,3], [3,1], [4,1] ], + [6.0, 5.0, 4.0, 3.0, 2.0, 11.0 ] + > : tensor<5x4xf64> + %4 = sparse_tensor.convert %m : tensor<5x4xf64> to tensor + + // + // CHECK: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255, 0, 0, 0 ) + // CHECK-NEXT: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, nan, nan, nan ) + // + %p0 = sparse_tensor.pointers %0 { dimension = 0 : index } + : tensor to memref + %i00 = sparse_tensor.indices %0 { dimension = 0 : index } + : tensor to memref + %i01 = sparse_tensor.indices %0 { dimension = 1 : index } + : tensor to memref + %v0 = sparse_tensor.values %0 + : tensor to memref + call @dumpi(%p0) : (memref) -> () + call @dumpi(%i00) : (memref) -> () + call @dumpi(%i01) : (memref) -> () + call @dumpf(%v0) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 1, 1, 2, 3, 98, 126, 126, 127, 127, 128, 249, 253, 253, 254, 255, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 3, 1, 3, 2, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3, 0, 3, 0, 0, 0 ) + // CHECK-NEXT: ( -1, 8, -5, -9, -7, 10, -11, 2, 12, -3, -13, 14, -15, 6, 16, 4, -17, nan, nan, nan ) + // + %p1 = sparse_tensor.pointers %1 { dimension = 0 : index } + : tensor to memref + %i10 = sparse_tensor.indices %1 { dimension = 0 : index } + : tensor to memref + %i11 = sparse_tensor.indices %1 { dimension = 1 : index } + : tensor to memref + %v1 = sparse_tensor.values %1 + : tensor to memref + call @dumpi(%p1) : (memref) -> () + call @dumpi(%i10) : (memref) -> () + call @dumpi(%i11) : (memref) -> () + call @dumpf(%v1) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 1, 1, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0 ) + // CHECK-NEXT: ( 3, 63, 11, 100, 66, 61, 13, 43, 77, 10, 46, 61, 53, 3, 75, 22, 18, nan, nan, nan ) + // + %p2 = sparse_tensor.pointers %2 { dimension = 0 : index } + : tensor to memref + %i20 = sparse_tensor.indices %2 { dimension = 0 : index } + : tensor to memref + %i21 = sparse_tensor.indices %2 { dimension = 1 : index } + : tensor to memref + %i22 = sparse_tensor.indices %2 { dimension = 2 : index } + : tensor to memref + %v2 = sparse_tensor.values %2 + : tensor to memref + call @dumpi(%p2) : (memref) -> () + call @dumpi(%i20) : (memref) -> () + call @dumpi(%i21) : (memref) -> () + call @dumpi(%i21) : (memref) -> () + call @dumpf(%v2) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 ) + // CHECK-NEXT: ( 66, 77, 61, 11, 61, 53, 22, 3, 100, 13, 10, 3, 18, 63, 43, 46, 75, nan, nan, nan ) + // + %p3 = sparse_tensor.pointers %3 { dimension = 0 : index } + : tensor to memref + %i30 = sparse_tensor.indices %3 { dimension = 0 : index } + : tensor to memref + %i31 = sparse_tensor.indices %3 { dimension = 1 : index } + : tensor to memref + %i32 = sparse_tensor.indices %3 { dimension = 2 : index } + : tensor to memref + %v3 = sparse_tensor.values %3 + : tensor to memref + call @dumpi(%p3) : (memref) -> () + call @dumpi(%i30) : (memref) -> () + call @dumpi(%i31) : (memref) -> () + call @dumpi(%i31) : (memref) -> () + call @dumpf(%v3) : (memref) -> () + + // + // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 6, 5, 4, 3, 2, 11, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan ) + // + %p4 = sparse_tensor.pointers %4 { dimension = 0 : index } + : tensor to memref + %i40 = sparse_tensor.indices %4 { dimension = 0 : index } + : tensor to memref + %i41 = sparse_tensor.indices %4 { dimension = 1 : index } + : tensor to memref + %v4 = sparse_tensor.values %4 + : tensor to memref + call @dumpi(%p4) : (memref) -> () + call @dumpi(%i40) : (memref) -> () + call @dumpi(%i41) : (memref) -> () + call @dumpf(%v4) : (memref) -> () + + // And last but not least, an actual operation applied to COO. + // Note that this performs the operation "in place". + %5 = call @sparse_scale(%4) : (tensor) -> tensor + + // + // CHECK-NEXT: ( 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 1, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 3, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 12, 10, 8, 6, 4, 22, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan ) + // + %p5 = sparse_tensor.pointers %5 { dimension = 0 : index } + : tensor to memref + %i50 = sparse_tensor.indices %5 { dimension = 0 : index } + : tensor to memref + %i51 = sparse_tensor.indices %5 { dimension = 1 : index } + : tensor to memref + %v5 = sparse_tensor.values %5 + : tensor to memref + call @dumpi(%p5) : (memref) -> () + call @dumpi(%i50) : (memref) -> () + call @dumpi(%i51) : (memref) -> () + call @dumpf(%v5) : (memref) -> () + + // Release the resources. + bufferization.dealloc_tensor %0 : tensor + bufferization.dealloc_tensor %1 : tensor + bufferization.dealloc_tensor %2 : tensor + bufferization.dealloc_tensor %3 : tensor + bufferization.dealloc_tensor %4 : tensor + + return + } +} -- 2.7.4