From: Aart Bik Date: Fri, 2 Jun 2023 23:41:49 +0000 (-0700) Subject: [mlir][sparse] fixed bug with unary op, dense output X-Git-Tag: upstream/17.0.6~6303 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6a38c772d4f6d59b61f775bc1d8ffbd055d7c6d3;p=platform%2Fupstream%2Fllvm.git [mlir][sparse] fixed bug with unary op, dense output Note that by sparse compiler convention, dense output is zerod out when not set, so complement results in zeros where elements were present. Reviewed By: wrengr Differential Revision: https://reviews.llvm.org/D152046 --- diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 7ebb602..d9f363a 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -1049,50 +1049,52 @@ static Value genTensorLoad(CodegenEnv &env, OpBuilder &builder, ExprId exp) { /// Generates a store on a dense or sparse tensor. static void genTensorStore(CodegenEnv &env, OpBuilder &builder, ExprId exp, Value rhs) { - linalg::GenericOp op = env.op(); - Location loc = op.getLoc(); + // Only unary and binary are allowed to return uninitialized rhs + // to indicate missing output. + if (!rhs) { + assert(env.exp(exp).kind == TensorExp::Kind::kUnary || + env.exp(exp).kind == TensorExp::Kind::kBinary); + return; + } // Test if this is a scalarized reduction. if (env.isReduc()) { env.updateReduc(rhs); return; } - // Store during insertion. + // Regular store. + linalg::GenericOp op = env.op(); + Location loc = op.getLoc(); OpOperand *t = op.getDpsInitOperand(0); - if (env.isSparseOutput(t)) { - if (!rhs) { - // Only unary and binary are allowed to return uninitialized rhs - // to indicate missing output. - assert(env.exp(exp).kind == TensorExp::Kind::kUnary || - env.exp(exp).kind == TensorExp::Kind::kBinary); - } else if (env.exp(exp).kind == TensorExp::Kind::kSelect) { - // Select operation insertion. - Value chain = env.getInsertionChain(); - scf::IfOp ifOp = - builder.create(loc, chain.getType(), rhs, /*else=*/true); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - // Existing value was preserved to be used here. - assert(env.exp(exp).val); - Value v0 = env.exp(exp).val; - genInsertionStore(env, builder, t, v0); - env.merger().clearExprValue(exp); - // Yield modified insertion chain along true branch. - Value mchain = env.getInsertionChain(); - builder.create(op.getLoc(), mchain); - // Yield original insertion chain along false branch. - builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, chain); - // Done with if statement. - env.updateInsertionChain(ifOp->getResult(0)); - builder.setInsertionPointAfter(ifOp); - } else { - genInsertionStore(env, builder, t, rhs); - } + if (!env.isSparseOutput(t)) { + SmallVector args; + Value ptr = genSubscript(env, builder, t, args); + builder.create(loc, rhs, ptr, args); return; } - // Actual store. - SmallVector args; - Value ptr = genSubscript(env, builder, t, args); - builder.create(loc, rhs, ptr, args); + // Store during sparse insertion. + if (env.exp(exp).kind != TensorExp::Kind::kSelect) { + genInsertionStore(env, builder, t, rhs); + return; + } + // Select operation insertion. + Value chain = env.getInsertionChain(); + scf::IfOp ifOp = + builder.create(loc, chain.getType(), rhs, /*else=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + // Existing value was preserved to be used here. + assert(env.exp(exp).val); + Value v0 = env.exp(exp).val; + genInsertionStore(env, builder, t, v0); + env.merger().clearExprValue(exp); + // Yield modified insertion chain along true branch. + Value mchain = env.getInsertionChain(); + builder.create(op.getLoc(), mchain); + // Yield original insertion chain along false branch. + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, chain); + // Done with if statement. + env.updateInsertionChain(ifOp->getResult(0)); + builder.setInsertionPointAfter(ifOp); } /// Generates an invariant value. diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir index 63c6d0e..462addf 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir @@ -32,14 +32,14 @@ // // Traits for tensor operations. // -#trait_vec_scale = { +#trait_vec = { indexing_maps = [ affine_map<(i) -> (i)>, // a (in) affine_map<(i) -> (i)> // x (out) ], iterator_types = ["parallel"] } -#trait_mat_scale = { +#trait_mat = { indexing_maps = [ affine_map<(i,j) -> (i,j)>, // A (in) affine_map<(i,j) -> (i,j)> // X (out) @@ -49,13 +49,13 @@ module { // Invert the structure of a sparse vector. Present values become missing. - // Missing values are filled with 1 (i32). - func.func @vector_complement(%arga: tensor) -> tensor { + // Missing values are filled with 1 (i32). Output is sparse. + func.func @vector_complement_sparse(%arga: tensor) -> tensor { %c = arith.constant 0 : index %ci1 = arith.constant 1 : i32 %d = tensor.dim %arga, %c : tensor %xv = bufferization.alloc_tensor(%d) : tensor - %0 = linalg.generic #trait_vec_scale + %0 = linalg.generic #trait_vec ins(%arga: tensor) outs(%xv: tensor) { ^bb(%a: f64, %x: i32): @@ -69,13 +69,35 @@ module { return %0 : tensor } + // Invert the structure of a sparse vector, where missing values are + // filled with 1. For a dense output, the sparse compiler initializes + // the buffer to all zero at all other places. + func.func @vector_complement_dense(%arga: tensor) -> tensor { + %c = arith.constant 0 : index + %d = tensor.dim %arga, %c : tensor + %xv = bufferization.alloc_tensor(%d) : tensor + %0 = linalg.generic #trait_vec + ins(%arga: tensor) + outs(%xv: tensor) { + ^bb(%a: f64, %x: i32): + %1 = sparse_tensor.unary %a : f64 to i32 + present={} + absent={ + %ci1 = arith.constant 1 : i32 + sparse_tensor.yield %ci1 : i32 + } + linalg.yield %1 : i32 + } -> tensor + return %0 : tensor + } + // Negate existing values. Fill missing ones with +1. func.func @vector_negation(%arga: tensor) -> tensor { %c = arith.constant 0 : index %cf1 = arith.constant 1.0 : f64 %d = tensor.dim %arga, %c : tensor %xv = bufferization.alloc_tensor(%d) : tensor - %0 = linalg.generic #trait_vec_scale + %0 = linalg.generic #trait_vec ins(%arga: tensor) outs(%xv: tensor) { ^bb(%a: f64, %x: f64): @@ -98,7 +120,7 @@ module { %c = arith.constant 0 : index %d = tensor.dim %arga, %c : tensor %xv = bufferization.alloc_tensor(%d) : tensor - %0 = linalg.generic #trait_vec_scale + %0 = linalg.generic #trait_vec ins(%arga: tensor) outs(%xv: tensor) { ^bb(%a: f64, %x: f64): @@ -126,7 +148,7 @@ module { %d0 = tensor.dim %argx, %c0 : tensor %d1 = tensor.dim %argx, %c1 : tensor %xv = bufferization.alloc_tensor(%d0, %d1) : tensor - %0 = linalg.generic #trait_mat_scale + %0 = linalg.generic #trait_mat ins(%argx: tensor) outs(%xv: tensor) { ^bb(%a: f64, %x: f64): @@ -153,7 +175,7 @@ module { %d0 = tensor.dim %argx, %c0 : tensor %d1 = tensor.dim %argx, %c1 : tensor %xv = bufferization.alloc_tensor(%d0, %d1) : tensor - %0 = linalg.generic #trait_mat_scale + %0 = linalg.generic #trait_mat ins(%argx: tensor) outs(%xv: tensor) { ^bb(%a: f64, %x: f64): @@ -223,6 +245,7 @@ module { // Driver method to call and verify vector kernels. func.func @entry() { + %cmu = arith.constant -99 : i32 %c0 = arith.constant 0 : index // Setup sparse vectors. @@ -240,7 +263,7 @@ module { %sm1 = sparse_tensor.convert %m1 : tensor<4x8xf64> to tensor // Call sparse vector kernels. - %0 = call @vector_complement(%sv1) + %0 = call @vector_complement_sparse(%sv1) : (tensor) -> tensor %1 = call @vector_negation(%sv1) : (tensor) -> tensor @@ -253,6 +276,9 @@ module { %4 = call @matrix_slice(%sm1) : (tensor) -> tensor + // Call kernel with dense output. + %5 = call @vector_complement_dense(%sv1) : (tensor) -> tensor + // // Verify the results. // @@ -268,6 +294,7 @@ module { // CHECK-NEXT: ( ( 3, 3, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 3 ), ( 0, 0, 4, 0, 5, 0, 0, 6 ), ( 7, 0, 7, 7, 0, 0, 0, 0 ) ) // CHECK-NEXT: ( 99, 99, 99, 99, 5, 6, 99, 99, 99, 0, 0, 0, 0, 0, 0, 0 ) // CHECK-NEXT: ( ( 99, 99, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 99 ), ( 0, 0, 99, 0, 5, 0, 0, 6 ), ( 99, 0, 99, 99, 0, 0, 0, 0 ) ) + // CHECK-NEXT: ( 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 ) // call @dump_vec_f64(%sv1) : (tensor) -> () call @dump_vec_i32(%0) : (tensor) -> () @@ -275,6 +302,8 @@ module { call @dump_vec_f64(%2) : (tensor) -> () call @dump_mat(%3) : (tensor) -> () call @dump_mat(%4) : (tensor) -> () + %v = vector.transfer_read %5[%c0], %cmu: tensor, vector<32xi32> + vector.print %v : vector<32xi32> // Release the resources. bufferization.dealloc_tensor %sv1 : tensor @@ -284,6 +313,7 @@ module { bufferization.dealloc_tensor %2 : tensor bufferization.dealloc_tensor %3 : tensor bufferization.dealloc_tensor %4 : tensor + bufferization.dealloc_tensor %5 : tensor return } }