From a2c4ca50caf43a3924a37580451ebe9fa3daa128 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Mon, 7 Nov 2022 08:48:52 -0800 Subject: [PATCH] Revert "[mlir][sparse] support Parallel for/reduction." This reverts commit 838389780e56f1a198a94f66ea436359466bf5ed. This broke the windows mlir buildbot: https://lab.llvm.org/buildbot/#/builders/13/builds/27934 --- .../SparseTensor/Transforms/CodegenUtils.cpp | 146 +++++---------------- .../Dialect/SparseTensor/Transforms/CodegenUtils.h | 32 +---- .../SparseTensor/Transforms/Sparsification.cpp | 133 +++++++++++-------- .../test/Dialect/SparseTensor/sparse_parallel.mlir | 20 +-- .../SparseTensor/sparse_parallel_reduce.mlir | 63 --------- .../Dialect/SparseTensor/CPU/sparse_matmul.mlir | 8 -- .../Dialect/SparseTensor/CPU/sparse_matvec.mlir | 10 -- 7 files changed, 127 insertions(+), 285 deletions(-) delete mode 100644 mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 27b7acb..032d802 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -219,12 +219,9 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, MutableArrayRef reduc, bool isParallel, ArrayRef extraTids, ArrayRef extraDims) { - assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); - // TODO: support multiple return on parallel for? - assert(!isParallel || reduc.empty() <= 1); Value step = constantIndex(builder, loc, 1); auto dimType = dimTypes[tid][dim]; @@ -235,38 +232,11 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( Value lo = isSparseInput ? pidxs[tid][dim] // current offset : loopSeqStack.back(); // univeral tid Value hi = highs[tid][dim]; - Operation *loop = nullptr; - Value iv; - if (isParallel) { - scf::ParallelOp parOp = - builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(parOp.getBody()); - assert(parOp.getNumReductions() == reduc.size()); - iv = parOp.getInductionVars()[0]; - - // In-place update on the reduction variable vector. - // Note that the init vals is not the actual reduction variables but instead - // used as a `special handle` to (temporarily) represent them. The - // expression on init vals will be moved into scf.reduce and replaced with - // the block arguments when exiting the loop (see exitForLoop). This is - // needed as we can not build the actual reduction block and get the actual - // reduction varaible before users fill parallel loop body. - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = parOp.getInitVals()[i]; - loop = parOp; - } else { - scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(forOp.getBody()); - iv = forOp.getInductionVar(); - - // In-place update on the reduction variable vector. - assert(forOp.getNumRegionIterArgs() == reduc.size()); - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = forOp.getRegionIterArg(i); - loop = forOp; - } - assert(loop && iv); + scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(forOp.getBody()); + Value iv = forOp.getInductionVar(); + assert(iv); if (isSparseInput) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. @@ -283,12 +253,16 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( // NOTE: we can also prepares for next dim here in advance // Push the loop into stack - loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), loop, + loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, coord[tid][dim]); // Emit extra locals. emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); - return loop; + // In-place update on the reduction variable vector. + assert(forOp.getNumRegionIterArgs() == reduc.size()); + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = forOp.getRegionIterArg(i); + return forOp; } Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims( @@ -460,73 +434,17 @@ void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims( } } -void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, - MutableArrayRef reduc) { +SmallVector +SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, + ArrayRef reduc) { LoopLevelInfo &loopInfo = loopStack.back(); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; - auto forOp = llvm::dyn_cast(loopInfo.loop); - if (forOp) { - if (!reduc.empty()) { - assert(reduc.size() == forOp.getNumResults()); - rewriter.setInsertionPointToEnd(forOp.getBody()); - rewriter.create(loc, reduc); - } - // Exit the loop. - rewriter.setInsertionPointAfter(forOp); - // In-place update reduction variables. - for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++) - reduc[i] = forOp.getResult(i); - } else { - auto parOp = llvm::cast(loopInfo.loop); - if (!reduc.empty()) { - assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1); - Operation *redExp = reduc.front().getDefiningOp(); - // Reduction expression should have no use. - assert(redExp->getUses().empty()); - // This must be a binary operation. - // NOTE: This is users' responsibilty to ensure the operation are - // commutative. - assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1); - - Value redVal = parOp.getInitVals().front(); - Value curVal; - if (redExp->getOperand(0) == redVal) - curVal = redExp->getOperand(1); - else if (redExp->getOperand(1) == redVal) - curVal = redExp->getOperand(0); - // One of the operands must be the init value (which is also the - // previous reduction value). - assert(curVal); - // The reduction expression should be the only user of the reduction val - // inside the parallel for. - unsigned numUsers = 0; - for (Operation *op : redVal.getUsers()) { - if (op->getParentOp() == parOp) - numUsers++; - } - assert(numUsers == 1); - (void)numUsers; // to silence unused variable warning in release build - - rewriter.setInsertionPointAfter(redExp); - auto redOp = rewriter.create(loc, curVal); - // Attach to the reduction op. - Block *redBlock = &redOp.getRegion().getBlocks().front(); - rewriter.setInsertionPointToEnd(redBlock); - Operation *newRed = rewriter.clone(*redExp); - // Replaces arguments of the reduction expression by using the block - // arguments from scf.reduce. - rewriter.updateRootInPlace( - newRed, [&]() { newRed->setOperands(redBlock->getArguments()); }); - // Erases the out-dated reduction expression. - rewriter.eraseOp(redExp); - rewriter.setInsertionPointToEnd(redBlock); - rewriter.create(loc, newRed->getResult(0)); - } - rewriter.setInsertionPointAfter(parOp); - // In-place update reduction variables. - for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++) - reduc[i] = parOp.getResult(i); + auto forOp = llvm::cast(loopInfo.loop); + if (!reduc.empty()) { + assert(reduc.size() == forOp.getNumResults()); + builder.setInsertionPointToEnd(forOp.getBody()); + builder.create(loc, reduc); } // Finished iterating a tensor, clean up @@ -540,10 +458,14 @@ void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, if (!isDenseDLT(dimTypes[tid][dim])) highs[tid][dim] = Value(); } + // exit the loop + builder.setInsertionPointAfter(forOp); + return forOp.getResults(); } -void SparseTensorLoopEmitter::exitCoIterationLoop( - OpBuilder &builder, Location loc, MutableArrayRef reduc) { +SmallVector +SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, + ArrayRef reduc) { auto whileOp = llvm::cast(loopStack.back().loop); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; @@ -577,10 +499,10 @@ void SparseTensorLoopEmitter::exitCoIterationLoop( } // Reduction value from users. - for (unsigned i = 0, e = reduc.size(); i < e; i++) { - operands.push_back(reduc[i]); - // In place update reduction variable. - reduc[i] = whileOp->getResult(o++); + SmallVector ret; + for (auto red : reduc) { + operands.push_back(red); + ret.push_back(whileOp->getResult(o++)); } // An (optional) universal index. @@ -595,24 +517,26 @@ void SparseTensorLoopEmitter::exitCoIterationLoop( assert(o == operands.size()); builder.create(loc, operands); builder.setInsertionPointAfter(whileOp); + return ret; } -void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter, - Location loc, - MutableArrayRef reduc) { +SmallVector +SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc, + ArrayRef reduc) { // Clean up the values, it would help use to discover potential bug at a // earlier stage (instead of silently using a wrong value). LoopLevelInfo &loopInfo = loopStack.back(); assert(loopInfo.tids.size() == loopInfo.dims.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { - exitCoIterationLoop(rewriter, loc, reduc); + red = exitCoiterationLoop(builder, loc, reduc); } else { - exitForLoop(rewriter, loc, reduc); + red = exitForLoop(builder, loc, reduc); } assert(loopStack.size() == loopSeqStack.size()); loopStack.pop_back(); + return red; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index a75d392..3228eb4 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -380,8 +380,8 @@ public: ArrayRef dims, bool needsUniv, MutableArrayRef reduc = {}, ArrayRef extraTids = {}, ArrayRef extraDims = {}); - void exitCurrentLoop(RewriterBase &rewriter, Location loc, - MutableArrayRef reduc = {}); + SmallVector exitCurrentLoop(OpBuilder &builder, Location loc, + ArrayRef reduc = {}); /// Returns the array of coordinate for all the loop generated till now. void getCoordinateArray(SmallVectorImpl &coords) const { @@ -452,35 +452,17 @@ private: ArrayRef dims); /// Exits a for loop, returns the reduction results, e.g., - /// For sequential for loops: /// %ret = for () { /// ... - /// %val = addi %args, %c /// yield %val /// } - /// For parallel loops, the following generated code by users: - /// %ret = parallel () init(%args) { - /// ... - /// %val = op %args, %c - /// } - /// will be transformed into - /// %ret = parallel () init(%args) { - /// ... - /// scf.reduce(%c) bb0(%0, %1){ - /// %val = op %0, %1 - /// scf.reduce.return %val - /// } - /// } - /// NOTE: only one instruction will be moved into reduce block, transformation - /// will fail if multiple instructions are used to compute the reduction - /// value. - /// Return %ret to user, while %val is provided by users (`reduc`). - void exitForLoop(RewriterBase &rewriter, Location loc, - MutableArrayRef reduc); + /// Return %ret to user, while %val is provided by users (`reduc`) + SmallVector exitForLoop(OpBuilder &builder, Location loc, + ArrayRef reduc); /// Exits a while loop, returns the reduction results. - void exitCoIterationLoop(OpBuilder &builder, Location loc, - MutableArrayRef reduc); + SmallVector exitCoiterationLoop(OpBuilder &builder, Location loc, + ArrayRef reduc); // Whether the loop emitter needs to treat the last tensor as the output // tensor. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 533d31f..9f01731 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -410,34 +410,6 @@ static Value getCustomRedId(Operation *op) { // Sparse compiler synthesis methods (statements and expressions). //===----------------------------------------------------------------------===// -/// Generates loop boundary statements (entering/exiting loops). The function -/// passes and updates the reduction value. -static Optional genLoopBoundary( - CodeGen &codegen, Merger &merger, - function_ref(MutableArrayRef reduc)> - callback) { - SmallVector reduc; - if (codegen.redVal) - reduc.push_back(codegen.redVal); - if (codegen.expValues) - reduc.push_back(codegen.expCount); - if (codegen.insChain) - reduc.push_back(codegen.insChain); - - auto r = callback(reduc); - - // Callback should do in-place update on reduction value vector. - unsigned i = 0; - if (codegen.redVal) - updateReduc(merger, codegen, reduc[i++]); - if (codegen.expValues) - codegen.expCount = reduc[i++]; - if (codegen.insChain) - codegen.insChain = reduc[i]; - - return r; -} - /// Local bufferization of all dense and sparse data structures. static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op) { @@ -897,25 +869,23 @@ static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder, /// Returns parallelization strategy. Any implicit loop in the Linalg /// operation that is marked "parallel" is a candidate. Whether it is actually /// converted to a parallel operation depends on the requested strategy. -static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isSparse) { +static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction, + bool isSparse) { // Reject parallelization of sparse output. if (codegen.sparseOut) return false; - // Parallel loops on tensor expansion can cause data races. - if (codegen.expCount) - return false; // Inspect strategy. switch (codegen.options.parallelizationStrategy) { case SparseParallelizationStrategy::kNone: return false; case SparseParallelizationStrategy::kDenseOuterLoop: - return isOuter && !isSparse; + return isOuter && !isSparse && !isReduction; case SparseParallelizationStrategy::kAnyStorageOuterLoop: - return isOuter; + return isOuter && !isReduction; case SparseParallelizationStrategy::kDenseAnyLoop: - return !isSparse; + return !isSparse && !isReduction; case SparseParallelizationStrategy::kAnyStorageAnyLoop: - return true; + return !isReduction; } llvm_unreachable("unexpected parallelization strategy"); } @@ -928,16 +898,33 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef extraDims) { Location loc = op.getLoc(); auto iteratorTypes = op.getIteratorTypesArray(); + bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]); bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) || isSingletonDLT(merger.getDimLevelType(tid, idx)); - bool isParallel = isParallelFor(codegen, isOuter, isSparse); - - Operation *loop = - genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { - return codegen.loopEmitter.enterLoopOverTensorAtDim( - builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims); - }).value(); - assert(loop); + bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse); + assert(!isParallel); + + // Emit a sequential for loop. + SmallVector operands; + if (codegen.redVal) + operands.push_back(codegen.redVal); + if (codegen.expValues) + operands.push_back(codegen.expCount); + if (codegen.insChain) + operands.push_back(codegen.insChain); + + Operation *loop = codegen.loopEmitter.enterLoopOverTensorAtDim( + builder, loc, tid, dim, operands, isParallel, extraTids, extraDims); + + unsigned o = 0; + if (codegen.redVal) + updateReduc(merger, codegen, operands[o++]); + if (codegen.expValues) + codegen.expCount = operands[o++]; + if (codegen.insChain) + codegen.insChain = operands[o++]; + assert(o == operands.size()); + return loop; } @@ -947,15 +934,29 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef condTids, ArrayRef condDims, ArrayRef extraTids, ArrayRef extraDims) { + SmallVector operands; + + // Construct the while-loop with a parameter for each index. + if (codegen.redVal) + operands.push_back(codegen.redVal); + if (codegen.expValues) + operands.push_back(codegen.expCount); + if (codegen.insChain) + operands.push_back(codegen.insChain); + + Operation *loop = codegen.loopEmitter.enterCoIterationOverTensorsAtDims( + builder, op.getLoc(), condTids, condDims, needsUniv, operands, extraTids, + extraDims); + + unsigned o = 0; + if (codegen.redVal) + updateReduc(merger, codegen, operands[o++]); + if (codegen.expValues) + codegen.expCount = operands[o++]; + if (codegen.insChain) + codegen.insChain = operands[o++]; + assert(o == operands.size()); - Operation *loop = - genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { - // Construct the while-loop with a parameter for each index. - return codegen.loopEmitter.enterCoIterationOverTensorsAtDims( - builder, op.getLoc(), condTids, condDims, needsUniv, reduc, - extraTids, extraDims); - }).value(); - assert(loop); return loop; } @@ -1185,21 +1186,37 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen, } /// Ends a single loop in current sequence. Returns new values for needsUniv. -static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter, +static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op, Operation *loop, unsigned idx, unsigned li, bool needsUniv) { // End a while-loop. if (auto whileOp = dyn_cast(loop)) { - finalizeWhileOp(merger, codegen, rewriter, op, idx, needsUniv, + finalizeWhileOp(merger, codegen, builder, op, idx, needsUniv, merger.lat(li).bits, whileOp); } else { needsUniv = false; } - genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { - codegen.loopEmitter.exitCurrentLoop(rewriter, op.getLoc(), reduc); - return llvm::None; - }); + SmallVector reduc; + if (codegen.redVal) + reduc.push_back(codegen.redVal); + if (codegen.expValues) + reduc.push_back(codegen.expCount); + if (codegen.insChain) + reduc.push_back(codegen.insChain); + + auto loopRet = + codegen.loopEmitter.exitCurrentLoop(builder, op.getLoc(), reduc); + assert(reduc.size() == loopRet.size()); + + unsigned o = 0; + if (codegen.redVal) + updateReduc(merger, codegen, loopRet[o++]); + if (codegen.expValues) + codegen.expCount = loopRet[o++]; + if (codegen.insChain) + codegen.insChain = loopRet[o++]; + assert(o == loopRet.size()); return needsUniv; } diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir index f38865c..38766b0 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -1,13 +1,14 @@ // RUN: mlir-opt %s -sparsification="parallelization-strategy=none" | \ // RUN: FileCheck %s --check-prefix=CHECK-PAR0 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR1 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR2 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR3 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR4 +// FIXME: we do not support vectorization/parallel loops in loop emitter right now +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR1 +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR2 +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR3 +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR4 #DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] @@ -150,8 +151,7 @@ func.func @scale_ss(%scale: f32, // // CHECK-PAR4-LABEL: func @matvec // CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.reduce +// CHECK-PAR4: scf.for // CHECK-PAR4: return // func.func @matvec(%arga: tensor<16x32xf32, #CSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir deleted file mode 100644 index 8ba66d2..0000000 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir +++ /dev/null @@ -1,63 +0,0 @@ -// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// RUN: FileCheck %s - -#CSR = #sparse_tensor.encoding<{ - dimLevelType = [ "dense", "compressed" ] -}> - -#trait_matvec = { - indexing_maps = [ - affine_map<(i,j) -> (i,j)>, // A - affine_map<(i,j) -> (j)>, // b - affine_map<(i,j) -> (i)> // x (out) - ], - iterator_types = ["parallel", "reduction"], - doc = "x(i) += A(i,j) * b(j)" -} -// CHECK-LABEL: func.func @matvec( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>, -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>, -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> { -// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK: %[[TMP_0:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} -// CHECK: %[[TMP_1:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} -// CHECK: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] -// CHECK: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32> -// CHECK: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32> -// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref -// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 { -// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32> -// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32 -// CHECK: scf.reduce(%[[TMP_14]]) : f32 { -// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32): -// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32 -// CHECK: scf.reduce.return %[[TMP_15]] : f32 -// CHECK: } -// CHECK: scf.yield -// CHECK: } -// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> -// CHECK: scf.yield -// CHECK: } -// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32> -// CHECK: return %[[TMP_5]] : tensor<16xf32> -func.func @matvec(%arga: tensor<16x32xf32, #CSR>, - %argb: tensor<32xf32>, - %argx: tensor<16xf32>) -> tensor<16xf32> { - %0 = linalg.generic #trait_matvec - ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { - ^bb(%A: f32, %b: f32, %x: f32): - %0 = arith.mulf %A, %b : f32 - %1 = arith.addf %0, %x : f32 - linalg.yield %1 : f32 - } -> tensor<16xf32> - return %0 : tensor<16xf32> -} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir index 459b0e1..c12d2b9 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -2,14 +2,6 @@ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// -// Do the same run, but now with parallelization. -// -// RUN: mlir-opt %s --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ -// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ -// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ -// RUN: FileCheck %s - #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index adc0b26..59e7f33 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -4,16 +4,6 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// -// Do the same run, but now with parallelization. -// -// RUN: mlir-opt %s \ -// RUN: --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ -// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ -// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ -// RUN: FileCheck %s !Filename = !llvm.ptr -- 2.7.4