std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType());
// Followed by one value.
blockArgTypes.push_back(rtp.getElementType());
+ // Followed by reduction variable.
+ blockArgTypes.append(initArgs.getTypes().begin(), initArgs.getTypes().end());
SmallVector<Location, 4> blockArgLocs;
- std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc());
+ std::fill_n(std::back_inserter(blockArgLocs), blockArgTypes.size(),
+ tensor.getLoc());
OpBuilder::InsertionGuard guard(builder);
auto ®ion = *result.regions.front();
return val;
}
+// FIXME:
+// 1. Dense tensors loop should be generated by loop emitter.
+// 2. Support reduction variables to propagate SSA chains properly.
void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop(
OpBuilder &builder, Location loc, Value src, unsigned rank,
function_ref<void(OpBuilder &, Location, Value, ValueRange)> bodyBuilder) {
RankedTensorType cooTp = getUnorderedCOOFromType(dstTp);
auto cooBuffer =
rewriter.create<AllocTensorOp>(loc, cooTp, dstDynSizes).getResult();
- rewriter.create<ForeachOp>(
- loc, srcTensor, llvm::None,
+ ForeachOp foreachOp = rewriter.create<ForeachOp>(
+ loc, srcTensor, cooBuffer,
[&](OpBuilder &builder, Location loc, ValueRange args, Value v,
ValueRange reduc) {
SmallVector<Value, 4> srcIndices;
}
translateIndicesArray(builder, loc, op.getReassociationIndices(),
srcIndices, srcSizes, dstSizes, dstIndices);
- builder.create<InsertOp>(loc, v, cooBuffer, dstIndices);
- builder.create<sparse_tensor::YieldOp>(loc);
+ auto t = builder.create<InsertOp>(loc, v, reduc.front(), dstIndices);
+ builder.create<sparse_tensor::YieldOp>(loc, t);
});
-
- rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp, cooBuffer);
+ auto t = rewriter.create<LoadOp>(loc, foreachOp.getResult(0), true);
+ rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp, t);
return success();
}
};
rewriter.create<AllocTensorOp>(loc, cooTp, ValueRange()).getResult();
Value offset = constantIndex(rewriter, loc, 0);
+ ForeachOp foreachOp;
for (Value input : op.getInputs()) {
// Builds the indexing map.
// Build a for op for each input tensor to append new values into the
// output tensor.
- rewriter.create<ForeachOp>(
- loc, input, llvm::None,
+ foreachOp = rewriter.create<ForeachOp>(
+ loc, input, cooBuffer,
[&](OpBuilder &builder, Location loc, ValueRange args, Value v,
ValueRange reduc) {
SmallVector<Value, 4> indices;
idx = builder.create<arith::AddIOp>(loc, idx, offset);
indices.push_back(idx);
}
- builder.create<InsertOp>(loc, v, cooBuffer, indices);
- builder.create<sparse_tensor::YieldOp>(loc);
+ auto t = builder.create<InsertOp>(loc, v, reduc.front(), indices);
+ builder.create<sparse_tensor::YieldOp>(loc, t);
});
// Accumulates the offset. Note that only static-shaped inputs are allowed
// by concatenate op verifier, which saves us from computing the offset
assert(!ShapedType::isDynamic(d));
offset = rewriter.create<arith::AddIOp>(loc, offset,
constantIndex(rewriter, loc, d));
+ cooBuffer = foreachOp.getResult(0);
}
+
+ cooBuffer = rewriter.create<LoadOp>(loc, cooBuffer, true);
rewriter.replaceOpWithNewOp<ConvertOp>(op, rtp, cooBuffer);
return success();
}
srcTp = getUnorderedCOOFromType(srcTp);
tmpCoo =
rewriter.create<AllocTensorOp>(loc, srcTp, dynSrcSizes).getResult();
- rewriter.create<ForeachOp>(
- loc, src, llvm::None,
+ auto foreachOp = rewriter.create<ForeachOp>(
+ loc, src, tmpCoo,
[&](OpBuilder &builder, Location loc, ValueRange args, Value v,
ValueRange reduc) {
SmallVector<Value, 4> indices;
uint64_t dim = toStoredDim(encSrc, i);
indices.push_back(args[dim]);
}
- builder.create<InsertOp>(loc, v, tmpCoo, indices);
- builder.create<sparse_tensor::YieldOp>(loc);
+ auto t = builder.create<InsertOp>(loc, v, reduc.front(), indices);
+ builder.create<sparse_tensor::YieldOp>(loc, t);
});
- src = tmpCoo;
+ src = rewriter.create<LoadOp>(loc, foreachOp.getResult(0), true);
}
// Sort the COO tensor so that its elements are ordered via increasing
getDynamicSizes(dstTp, srcSizes, dynDstSizes);
Value dst =
rewriter.create<AllocTensorOp>(loc, dstTp, dynDstSizes).getResult();
- rewriter.create<ForeachOp>(loc, src, llvm::None,
- [&](OpBuilder &builder, Location loc,
- ValueRange args, Value v, ValueRange reduc) {
- SmallVector<Value, 4> indices;
- for (int64_t i = 0, e = srcTp.getRank(); i < e;
- i++) {
- uint64_t dim = toStoredDim(encDst, i);
- indices.push_back(args[dim]);
- }
- builder.create<InsertOp>(loc, v, dst, indices);
- builder.create<sparse_tensor::YieldOp>(loc);
- });
+ auto foreachOp = rewriter.create<ForeachOp>(
+ loc, src, dst,
+ [&](OpBuilder &builder, Location loc, ValueRange args, Value v,
+ ValueRange reduc) {
+ SmallVector<Value, 4> indices;
+ for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) {
+ uint64_t dim = toStoredDim(encDst, i);
+ indices.push_back(args[dim]);
+ }
+ auto t = builder.create<InsertOp>(loc, v, reduc.front(), indices);
+ builder.create<sparse_tensor::YieldOp>(loc, t);
+ });
- // Release the temporary COO if it is created.
+ // Release the temporary COO if it is created. Note that tmpCoo is
+ // invalidated due to foreach and updated to src.
if (tmpCoo)
- rewriter.create<DeallocTensorOp>(loc, tmpCoo);
+ rewriter.create<DeallocTensorOp>(loc, src);
// Directly replace op with dst results in bufferization error message
// "sparse tensor allocation should not escape function".
// As such, we insert a trivial tensor convert which will be removed by
// codegen.
rewriter.setInsertionPointAfter(op);
- rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp, dst);
+ auto t = rewriter.create<LoadOp>(loc, foreachOp.getResult(0), true);
+ rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp, t);
return success();
}
};
int64_t rank = rtp.getRank();
auto enc = getSparseTensorEncoding(rtp);
+ SmallVector<Value> reduc = op.getInitArgs();
+
// 1. Generates loop for the sparse input.
SparseTensorLoopEmitter loopEmitter(ValueRange{input});
loopEmitter.initializeLoopEmit(rewriter, loc);
// TODO: provide utility function for loop sequences that only contains
// one for loop?
loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast<size_t>(i));
- loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i);
+ // Note that reduc will be taken care of by loop emitter and get updated
+ // in place.
+ loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i, reduc);
}
SmallVector<Value, 4> coords;
: rewriter.create<memref::LoadOp>(loc, vals, coords);
// 2. Inline the block in the foreach operator.
- Block::iterator inlinePos = rewriter.getInsertionPoint();
Block *srcBlock = op.getBody();
- // Remove sparse_tensor.yield.
- rewriter.eraseOp(srcBlock->getTerminator());
-
- for (int64_t i = 0; i < rank; i++) {
- loopEmitter.exitCurrentLoop(rewriter, loc);
- loopEmitter.exitCurrentLoopSeq();
- }
SmallVector<Value, 4> args;
// Remap coordinates.
}
// Remap value.
args.push_back(val);
+ // Remap reduction variables.
+ args.append(reduc);
+
+ // Remove sparse_tensor.yield.
+ SmallVector<Value> reducValue = srcBlock->getTerminator()->getOperands();
+ rewriter.eraseOp(srcBlock->getTerminator());
// Inline body.
- rewriter.mergeBlockBefore(srcBlock, &*inlinePos, args);
- // delete the foreach operator.
- rewriter.eraseOp(op);
+ if (!reducValue.empty()) {
+ rewriter.mergeBlocks(srcBlock, rewriter.getBlock(), args);
+ } else {
+ // This is annoying, since scf.for inserts a implicit yield op when
+ // there is no reduction variable upon creation, in this case we need to
+ // merge the block *before* the yield op.
+ rewriter.mergeBlockBefore(srcBlock, &*rewriter.getInsertionPoint(), args);
+ }
+
+ for (int64_t i = 0; i < rank; i++) {
+ // Link the reduction chain. Note that loop emitter update the reducValue
+ // in place.
+ loopEmitter.exitCurrentLoop(rewriter, loc, reducValue);
+ loopEmitter.exitCurrentLoopSeq();
+ }
+
+ // Replace the foreach operator with the value returned by the outtermost
+ // for loop.
+ rewriter.replaceOp(op, reducValue);
return success();
}
};
.getResult(0);
Type eltTp = dstTp.getElementType();
Value value = genAllocaScalar(rewriter, loc, eltTp);
- scf::ForOp forOp = rewriter.create<scf::ForOp>(loc, c0, nnz, c1);
+ scf::ForOp forOp = rewriter.create<scf::ForOp>(loc, c0, nnz, c1,
+ ArrayRef<Value>(cooBuffer));
rewriter.setInsertionPointToStart(forOp.getBody());
SmallString<18> getNextFuncName{"getSparseTensorReaderNext",
loc, indices, constantIndex(rewriter, loc, i)));
}
Value v = rewriter.create<memref::LoadOp>(loc, value);
- rewriter.create<InsertOp>(loc, v, cooBuffer, indicesArray);
+ auto t = rewriter.create<InsertOp>(loc, v, forOp.getRegionIterArg(0),
+ indicesArray);
+ rewriter.create<scf::YieldOp>(loc, ArrayRef<Value>(t));
rewriter.setInsertionPointAfter(forOp);
+ // Link SSA chain.
+ cooBuffer = forOp.getResult(0);
// Release the sparse tensor reader.
createFuncCall(rewriter, loc, "delSparseTensorReader", {}, {reader},
EmitCInterface::Off);
-
+ cooBuffer = rewriter.create<LoadOp>(loc, cooBuffer, true);
Value newOp = rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp, cooBuffer);
// Release the unordered COO tensor buffer.
// CHECK-RWT: %[[V:.*]] = tensor.extract %[[A]]{{\[}}%[[FI]], %[[FJ]]] : tensor<2x4xf64>
// CHECK-RWT: %[[NZ:.*]] = arith.cmpf une, %[[V]], %[[F0]] : f64
// CHECK-RWT: scf.if %[[NZ]] {
+// // FIXME: the SSA chain is broken here!
// CHECK-RWT: %{{.*}} = sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[FI]], %[[FJ]]]
// CHECK-RWT: }
// CHECK-RWT: }
// CHECK-RWT: %[[V2:.*]] = sparse_tensor.values %[[COO]]
// CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]], %[[I1]] jointly %[[V2]]
// CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor()
-// CHECK-RWT: sparse_tensor.foreach in %[[COO]]
-// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64):
-// CHECK-RWT: sparse_tensor.insert %[[FV]] into %[[DST]]{{\[}}%[[FI0]], %[[FI1]]]
+// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]])
+// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64, %[[R0:.*]]: tensor
+// CHECK-RWT: %[[RET:.*]] = sparse_tensor.insert %[[FV]] into %[[R0]]{{\[}}%[[FI0]], %[[FI1]]]
+// CHECK-RWT: sparse_tensor.yield %[[RET]]
// CHECK-RWT: }
-// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]]
+// CHECK-RWT: %[[NT:.*]] = sparse_tensor.load %[[NEW_T]] hasInserts
+// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[NT]]
// CHECK-RWT: bufferization.dealloc_tensor %[[COO]]
// CHECK-RWT: return %[[R]] : tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>
func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> {
// CHECK-RWT: %[[I1r:.*]] = tensor.extract %[[SI]]{{\[}}%[[FI]], %[[C1]]] : tensor<2x2xi64>
// CHECK-RWT: %[[I1:.*]] = arith.index_cast %[[I1r]] : i64 to index
// CHECK-RWT: %[[V:.*]] = tensor.extract %[[SV]]{{\[}}%[[FI]]] : tensor<2xf32>
+// // FIXME: the SSA chain is broken here!
// CHECK-RWT: sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[I0]], %[[I1]]]
// CHECK-RWT: }
// CHECK-RWT: %[[TI0:.*]] = sparse_tensor.indices %[[COO]] {dimension = 0 : index}
// CHECK-RWT: %[[TV:.*]] = sparse_tensor.values %[[COO]]
// CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[TI0]], %[[TI1]] jointly %[[TV]]
// CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor()
-// CHECK-RWT: sparse_tensor.foreach in %[[COO]]
-// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32):
-// CHECK-RWT: sparse_tensor.insert %[[F2V]] into %[[DST]]{{\[}}%[[F2I0]], %[[F2I1]]]
+// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]])
+// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32, %[[R0:.*]]: tensor
+// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.insert %[[F2V]] into %[[R0]]{{\[}}%[[F2I0]], %[[F2I1]]]
+// CHECK-RWT: sparse_tensor.yield %[[NEW_T]]
// CHECK-RWT: }
-// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]]
+// CHECK-RWT: %[[T:.*]] = sparse_tensor.load %[[RET]] hasInserts
+// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[T]]
// CHECK-RWT: bufferization.dealloc_tensor %[[COO]]
// CHECK-RWT: return %[[R]] : tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>
func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[A]]
// CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]] jointly %[[V]]
// CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor(%[[D]])
-// CHECK-RWT: sparse_tensor.foreach in %[[A]]
-// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32):
-// CHECK-RWT: sparse_tensor.insert %[[FV2]] into %[[DST]]{{\[}}%[[FI2]]]
+// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[A]] init(%[[DST]])
+// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32, %[[T:.*]]: tensor<?xf32,
+// CHECK-RWT: %[[I:.*]] = sparse_tensor.insert %[[FV2]] into %[[T]]{{\[}}%[[FI2]]]
+// CHECK-RWT: sparse_tensor.yield %[[I]]
// CHECK-RWT: }
-// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]]
+// CHECK-RWT: %[[T:.*]] = sparse_tensor.load %[[RET]] hasInserts
+// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[T]]
// CHECK-RWT: return %[[R]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 32, indexBitWidth = 32 }>>
func.func @sparse_convert(%arg0: tensor<?xf32, #SparseVector64>) -> tensor<?xf32, #SparseVector32> {
%0 = sparse_tensor.convert %arg0 : tensor<?xf32, #SparseVector64> to tensor<?xf32, #SparseVector32>
// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]])
// CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]])
// CHECK: %[[VB:.*]] = memref.alloca()
-// CHECK: scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] {
+// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]])
// CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]])
// CHECK: %[[E0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]]
// CHECK: %[[E1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]]
// CHECK: %[[V:.*]] = memref.load %[[VB]][]
-// CHECK: sparse_tensor.insert %[[V]] into %[[T]]{{\[}}%[[E0]], %[[E1]]]
+// CHECK: %[[T1:.*]] = sparse_tensor.insert %[[V]] into %[[A2]]{{\[}}%[[E0]], %[[E1]]]
+// CHECK: scf.yield %[[T1]]
// CHECK: }
// CHECK: call @delSparseTensorReader(%[[R]])
-// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T]]
-// CHECK: bufferization.dealloc_tensor %[[T]]
+// CHECK: %[[T3:.*]] = sparse_tensor.load %[[T2]] hasInserts
+// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T3]]
+// CHECK: bufferization.dealloc_tensor %[[T3]]
// CHECK: return %[[R]]
-// CHECK: }
func.func @sparse_new(%arg0: !llvm.ptr<i8>) -> tensor<?x?xf32, #CSR> {
%0 = sparse_tensor.new %arg0 : !llvm.ptr<i8> to tensor<?x?xf32, #CSR>
return %0 : tensor<?x?xf32, #CSR>
// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
-// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] {
+// CHECK: %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]])
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref<?xindex>
-// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
+// CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]])
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
-// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: scf.yield %[[NEW_1]]
// CHECK: }
+// CHECK: scf.yield %[[RET_4]]
// CHECK: }
// CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor
// CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor
// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
-// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] {
+// CHECK: %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]])
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref<?xindex>
-// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
+// CHECK: %[[RET_5:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A3:.*]] = %[[A2]])
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
-// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: scf.yield %[[NEW_2]]
// CHECK: }
+// CHECK: scf.yield %[[RET_5]]
// CHECK: }
// CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor
// CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor
// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
-// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] {
+// CHECK: %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]])
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref<?xindex>
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref<?xindex>
-// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
+// CHECK: %[[RET_6:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A5:.*]] = %[[A4]])
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
-// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: scf.yield %[[NEW_3]]
// CHECK: }
+// CHECK: scf.yield %[[RET_6]]
// CHECK: }
-// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor
+// CHECK: %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts
+// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor
// CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor
func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
%arg1: tensor<3x4xf64, #DCSR>,
// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]]
// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref<?xindex>
// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
-// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] {
+// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]])
// CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref<?xf64>
// CHECK-RWT: %[[DI0:.*]] = arith.divui %[[SI]], %[[C10]] : index
// CHECK-RWT: %[[DI1:.*]] = arith.remui %[[SI]], %[[C10]] : index
-// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]]
+// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]]
+// CHECK-RWT: scf.yield %[[NT:.*]]
// CHECK-RWT: }
-// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]]
+// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
+// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]]
// CHECK-RWT: return %[[T]] : tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
//
func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> {
// CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor()
// CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index}
// CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index}
-// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index}
-// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index}
-// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]]
-// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref<?xindex>
-// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
-// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] {
-// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
-// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
-// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
-// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref<?xindex>
-// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] {
-// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref<?xindex>
-// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref<?xf64>
-// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index
-// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index
-// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]]
-// CHECK-RWT }
-// CHECK-RWT: }
-// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]]
+// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index}
+// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index}
+// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]]
+// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref<?xindex>
+// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
+// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[A0:.*]] = %[[B]])
+// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
+// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
+// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
+// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref<?xindex>
+// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[A1:.*]] = %[[A0]])
+// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref<?xindex>
+// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref<?xf64>
+// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index
+// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index
+// CHECK-RWT: %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]]
+// CHECK-RWT scf.yield %[[R1]]
+// CHECK-RWT }
+// CHECK-RWT scf.yield %[[RET_1]]
+// CHECK-RWT: }
+// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
+// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]]
// CHECK-RWT: return %[[T]] : tensor<100xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>>
//
func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> {
// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]]
// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref<?xindex>
// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
-// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] {
+// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]])
// CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref<?xf64>
// CHECK-RWT: %[[T1:.*]] = arith.muli %[[DD0]], %[[C10]] : index
// CHECK-RWT: %[[T3:.*]] = arith.remui %[[SI]], %[[T2]] : index
// CHECK-RWT: %[[T4:.*]] = arith.divui %[[T2]], %[[C10]] : index
// CHECK-RWT: %[[DI1:.*]] = arith.divui %[[T3]], %[[T4]] : index
-// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]]
+// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]]
+// CHECK-RWT: scf.yield %[[NT]]
// CHECK-RWT: }
-// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]]
+// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
+// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]]
// CHECK-RWT: return %[[T]] : tensor<?x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
//
func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<?x10xf64, #SparseMatrix> {
// CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor(%[[DD0]])
// CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index}
// CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index}
-// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index}
-// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index}
-// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]]
-// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref<?xindex>
-// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
-// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] {
-// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
-// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
-// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
-// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref<?xindex>
-// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] {
-// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref<?xindex>
-// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref<?xf64>
-// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index
-// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index
-// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index
-// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index
-// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index
-// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]]
-// CHECK-RWT }
-// CHECK-RWT: }
-// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]]
+// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index}
+// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index}
+// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]]
+// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref<?xindex>
+// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref<?xindex>
+// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R0:.*]] = %[[B]])
+// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref<?xindex>
+// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref<?xindex>
+// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index
+// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref<?xindex>
+// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[R1:.*]] = %[[R0]])
+// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref<?xindex>
+// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref<?xf64>
+// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index
+// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index
+// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index
+// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index
+// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index
+// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]]
+// CHECK-RWT scf.yield %[[NT]]
+// CHECK-RWT }
+// CHECK-RWT scf.yield %[[RET_1]]
+// CHECK-RWT: }
+// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
+// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]]
// CHECK-RWT: return %[[T]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>>
//
func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor<?xf64, #SparseVector> {