From 3bd82f30dcc25533de1ff900a704efa77a6951da Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 3 Feb 2023 17:26:04 -0800 Subject: [PATCH] [mlir][sparse] compute allocation size_hint This adds the hint to a number of tensor allocations in codegens, shaving off quite some time from e.g. reading in sparse matrices due to zero-reallocation scheme. Note that we can probably provide hints on all allocations, and refine the heuristics that use them for general tensors. Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D143309 --- .../Transforms/SparseTensorCodegen.cpp | 4 +++ .../Transforms/SparseTensorRewriting.cpp | 42 +++++++++++++--------- .../SparseTensor/rewriting_for_codegen.mlir | 6 ++-- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 9f3388d..b17305e 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -229,6 +229,10 @@ static void createAllocFields(OpBuilder &builder, Location loc, Type type, ptrHeuristic = constantIndex(builder, loc, 2); idxHeuristic = builder.create( loc, constantIndex(builder, loc, rank), sizeHint); // AOS + } else if (rank == 2 && isDenseDim(rtp, 0) && isCompressedDim(rtp, 1)) { + ptrHeuristic = builder.create( + loc, sizeHint, constantIndex(builder, loc, 1)); + idxHeuristic = sizeHint; } else { ptrHeuristic = idxHeuristic = constantIndex(builder, loc, 16); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 27fef5c..76745a5 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -410,9 +410,14 @@ public: // foreach srcCoords %srcTensor // insert translateIndicesArray(srcCoords), %tmp // %t = sparse_tensor.cast %tmp + Value nnz = rewriter.create(loc, srcTensor); RankedTensorType cooTp = getUnorderedCOOFromType(dstTp); - auto cooBuffer = - rewriter.create(loc, cooTp, dstDynSizes).getResult(); + Value cooBuffer = + rewriter + .create(loc, cooTp, dstDynSizes, Value(), + /*sizeHint=*/nnz, Attribute()) + .getResult(); + ForeachOp foreachOp = rewriter.create( loc, srcTensor, cooBuffer, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, @@ -787,6 +792,7 @@ private: SmallVector srcSizes; sizesForTensor(rewriter, srcSizes, loc, srcTp, src); Value tmpCoo = Value(); + Value nnz = rewriter.create(loc, src); // We need a tmp COO buffer if and only if // 1. the src tensor is not a COO and // 2. the src tensor is not ordered in the same way as the target @@ -802,8 +808,10 @@ private: getDynamicSizes(srcTp, srcSizes, dynSrcSizes); srcTp = getUnorderedCOOFromTypeWithOrdering(srcTp, encDst.getDimOrdering()); - tmpCoo = - rewriter.create(loc, srcTp, dynSrcSizes).getResult(); + tmpCoo = rewriter + .create(loc, srcTp, dynSrcSizes, Value(), + /*sizeHint=*/nnz, Attribute()) + .getResult(); auto foreachOp = rewriter.create( loc, src, tmpCoo, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, @@ -823,11 +831,6 @@ private: // Only need to sort if the srcTp is not already sorted (we faithfully take // the guarantee from the sparse tensor encoding). if (!isAllDimOrdered(srcTp)) { - // Retrieve NNZ. - Value nnz = rewriter.create(loc, src); - nnz = rewriter.create(loc, rewriter.getIndexType(), - nnz); - // Retrieve the values-array. Value y = genToValues(rewriter, loc, src); SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(srcTp); @@ -858,8 +861,10 @@ private: // For each element in the COO tensor, insert the element to the dst tensor. SmallVector dynDstSizes; getDynamicSizes(dstTp, srcSizes, dynDstSizes); - Value dst = - rewriter.create(loc, dstTp, dynDstSizes).getResult(); + Value dst = rewriter + .create(loc, dstTp, dynDstSizes, Value(), + /*sizeHint=*/nnz, Attribute()) + .getResult(); SmallVector indices(srcTp.getRank(), Value()); auto foreachOp = rewriter.create( loc, src, dst, @@ -1027,18 +1032,21 @@ struct NewRewriter : public OpRewritePattern { // get the next element from the input file // insert the element to %tmp // %t = sparse_tensor.ConvertOp %tmp - RankedTensorType cooTp = - getUnorderedCOOFromTypeWithOrdering(dstTp, encDst.getDimOrdering()); - Value cooBuffer = - rewriter.create(loc, cooTp, dynSizesArray).getResult(); - Value c0 = constantIndex(rewriter, loc, 0); Value c1 = constantIndex(rewriter, loc, 1); Value nnz = createFuncCall(rewriter, loc, "getSparseTensorReaderNNZ", {indexTp}, {reader}, EmitCInterface::Off) .getResult(0); - Value symmetric; + RankedTensorType cooTp = + getUnorderedCOOFromTypeWithOrdering(dstTp, encDst.getDimOrdering()); + Value cooBuffer = + rewriter + .create(loc, cooTp, dynSizesArray, Value(), + /*sizeHint=*/nnz, Attribute()) + .getResult(); + // The verifier ensures only 2D tensors can have the expandSymmetry flag. + Value symmetric; if (rank == 2 && op.getExpandSymmetry()) { symmetric = createFuncCall(rewriter, loc, "getSparseTensorReaderIsSymmetric", diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir index 00811d4..df95848 100644 --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -20,8 +20,8 @@ // CHECK: call @copySparseTensorReaderDimSizes(%[[R]], %[[DS]]) // CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] -// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) +// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]] // CHECK: %[[S:.*]] = call @getSparseTensorReaderIsSymmetric(%[[R]]) // CHECK: %[[VB:.*]] = memref.alloca() // CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) @@ -59,8 +59,8 @@ func.func @sparse_new_symmetry(%arg0: !llvm.ptr) -> tensor { // CHECK: call @copySparseTensorReaderDimSizes(%[[R]], %[[DS]]) // CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] -// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) +// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]] // CHECK: %[[VB:.*]] = memref.alloca() // CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) @@ -90,8 +90,8 @@ func.func @sparse_new(%arg0: !llvm.ptr) -> tensor { // CHECK: call @copySparseTensorReaderDimSizes(%[[R]], %[[DS]]) // CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] -// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) +// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]] // CHECK: %[[VB:.*]] = memref.alloca() // CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) -- 2.7.4