GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnVecHandle:$dnX,
GPU_SparseDnVecHandle:$dnY,
- OptionalAttr<TypeAttr>:$computeType);
+ TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz,
Optional<GPU_AsyncToken>:$asyncToken);
"Value":$env,
"Value":$spmatA,
"Value":$dnX,
- "Value":$dnY)
+ "Value":$dnY,
+ "Type":$computeType)
, [{
auto modeA = gpu::TransposeMode::NON_TRANSPOSE;
return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies,
- env, modeA, spmatA, dnX, dnY, {});}]>
+ env, modeA, spmatA, dnX, dnY, computeType);}]>
];
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY attr-dict ( `into` $computeType^)?
+ $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY attr-dict `into` $computeType
}];
}
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnVecHandle:$dnX,
GPU_SparseDnVecHandle:$dnY,
- OptionalAttr<TypeAttr>:$computeType,
+ TypeAttr:$computeType,
AnyMemRef:$buffer);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
"Value":$spmatA,
"Value":$dnX,
"Value":$dnY,
+ "Type":$computeType,
"Value":$buffer), [{
auto modeA = gpu::TransposeMode::NON_TRANSPOSE;
return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA,
- spmatA, dnX, dnY, {}, buffer);}]>
+ spmatA, dnX, dnY, computeType, buffer);}]>
];
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY `,` $buffer attr-dict `:` type($buffer) ( `into` $computeType^)?
+ $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnX `,` $dnY `,` $buffer attr-dict `:` type($buffer) `into` $computeType
}];
}
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseDnMatHandle:$dnmatC,
- OptionalAttr<TypeAttr>:$computeType);
+ TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz,
Optional<GPU_AsyncToken>:$asyncToken);
"Value":$env,
"Value":$spmatA,
"Value":$dnmatB,
- "Value":$dnmatC), [{
+ "Value":$dnmatC,
+ "Type":$computeType), [{
auto modeA = gpu::TransposeMode::NON_TRANSPOSE;
auto modeB = gpu::TransposeMode::NON_TRANSPOSE;
return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies,
- env, modeA, modeB, spmatA, dnmatB, dnmatC, {});}]>
+ env, modeA, modeB, spmatA, dnmatB, dnmatC, computeType);}]>
];
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC attr-dict ( `into` $computeType^)?
+ $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC attr-dict `into` $computeType
}];
}
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseDnMatHandle:$dnmatC,
- OptionalAttr<TypeAttr>:$computeType,
+ TypeAttr:$computeType,
AnyMemRef:$buffer);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
"Value":$spmatA,
"Value":$dnmatB,
"Value":$dnmatC,
+ "Type":$computeType,
"Value":$buffer), [{
auto modeA = gpu::TransposeMode::NON_TRANSPOSE;
auto modeB = gpu::TransposeMode::NON_TRANSPOSE;
return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA,
- modeB, spmatA, dnmatB, dnmatC, {}, buffer);}]>
+ modeB, spmatA, dnmatB, dnmatC, computeType, buffer);}]>
];
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC `,` $buffer attr-dict `:` type($buffer) ( `into` $computeType^)?
+ $env `,` $spmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $dnmatC `,` $buffer attr-dict `:` type($buffer) `into` $computeType
}];
}
GPU_SparseDnMatHandle:$dnmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseSpMatHandle:$spmatC,
- OptionalAttr<TypeAttr>:$computeType);
+ TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz, Optional<GPU_AsyncToken>:$asyncToken);
let builders = [OpBuilder<(ins
- "::mlir::Type":$bufferSz,
- "::mlir::Type":$asyncToken,
- "::mlir::ValueRange":$asyncDependencies,
- "::mlir::Value":$env,
- "::mlir::Value":$dnmatA,
- "::mlir::Value":$dnmatB,
- "::mlir::Value":$spmatC), [{
+ "Type":$bufferSz,
+ "Type":$asyncToken,
+ "ValueRange":$asyncDependencies,
+ "Value":$env,
+ "Value":$dnmatA,
+ "Value":$dnmatB,
+ "Value":$spmatC,
+ "Type":$computeType), [{
auto modeA = gpu::TransposeMode::NON_TRANSPOSE;
auto modeB = gpu::TransposeMode::NON_TRANSPOSE;
return build($_builder, $_state, bufferSz, asyncToken, asyncDependencies,
- env, modeA, modeB, dnmatA, dnmatB, spmatC, {});}]>
+ env, modeA, modeB, dnmatA, dnmatB, spmatC, computeType);}]>
];
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC attr-dict ( `into` $computeType^)?
+ $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC attr-dict `into` $computeType
}];
}
GPU_SparseDnMatHandle:$dnmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseSpMatHandle:$spmatC,
- OptionalAttr<TypeAttr>:$computeType,
+ TypeAttr:$computeType,
AnyMemRef:$buffer);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
let builders = [OpBuilder<(ins
- "::mlir::Type":$asyncToken,
- "::mlir::ValueRange":$asyncDependencies,
- "::mlir::Value":$env,
- "::mlir::Value":$dnmatA,
- "::mlir::Value":$dnmatB,
- "::mlir::Value":$spmatC,
- "::mlir::Value":$buffer), [{
+ "Type":$asyncToken,
+ "ValueRange":$asyncDependencies,
+ "Value":$env,
+ "Value":$dnmatA,
+ "Value":$dnmatB,
+ "Value":$spmatC,
+ "Type":$computeType,
+ "Value":$buffer), [{
auto modeA = gpu::TransposeMode::NON_TRANSPOSE;
auto modeB = gpu::TransposeMode::NON_TRANSPOSE;
return build($_builder, $_state, asyncToken, asyncDependencies, env, modeA,
- modeB, dnmatA, dnmatB, spmatC, {}, buffer);}]>
+ modeB, dnmatA, dnmatB, spmatC, computeType, buffer);}]>
];
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
- $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC `,` $buffer attr-dict `:` type($buffer) ( `into` $computeType^)?
+ $env `,` $dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC `,` $buffer attr-dict `:` type($buffer) `into` $computeType
}];
}
return success();
}
-// Returns the element type of the defining spmat op.
-// TODO: safer and more flexible to store data type in actual op instead?
-static Type getSpMatElemType(Value spMat) {
- if (auto op = spMat.getDefiningOp<gpu::CreateCooOp>())
- return llvm::cast<MemRefType>(op.getValues().getType()).getElementType();
- if (auto op = spMat.getDefiningOp<gpu::CreateCsrOp>())
- return llvm::cast<MemRefType>(op.getValues().getType()).getElementType();
- llvm_unreachable("cannot find spmat def");
-}
-
-// Returns the element type of the defining dnmat or dnvec op.
-static Type getDnElemType(Value dn) {
- if (auto op = dn.getDefiningOp<gpu::CreateDnMatOp>())
- return op.getMemref().getType().getElementType();
- if (auto op = dn.getDefiningOp<gpu::CreateDnVecOp>())
- return op.getMemref().getType().getElementType();
- llvm_unreachable("cannot find dn def");
-}
-
template <typename T>
static Value genConstInt32From(OpBuilder &builder, Location loc, T TValue) {
Type llvmInt32Type = builder.getIntegerType(32);
static_cast<int32_t>(TValue));
}
-static Value
-genConstInt32FromOptionalComputeMode(OpBuilder &builder, Location loc,
- std::optional<Type> computeTypeOptional,
- Type defaultType) {
- auto computeTypeInt =
- getCuSparseDataTypeFrom(computeTypeOptional.value_or(defaultType));
- auto computeType = genConstInt32From(builder, loc, computeTypeInt);
- return computeType;
+static Value genConstInt32FromComputeMode(OpBuilder &builder, Location loc,
+ Type computeType) {
+ auto computeTypeInt = getCuSparseDataTypeFrom(computeType);
+ auto computeTypeConst = genConstInt32From(builder, loc, computeTypeInt);
+ return computeTypeConst;
}
LogicalResult ConvertCreateSparseEnvOpToGpuRuntimeCallPattern::matchAndRewrite(
return failure();
Location loc = op.getLoc();
auto modeA = genConstInt32From(rewriter, loc, op.getModeA());
- // retrieve the compute type, notice that it may be optional
- auto computeType = genConstInt32FromOptionalComputeMode(
- rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnY()));
+ auto computeType =
+ genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType());
auto stream = adaptor.getAsyncDependencies().front();
auto bufferSize =
spMVBufferSizeCallBuilder
return failure();
Location loc = op.getLoc();
auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA());
- // retrieve the compute type, notice that it may be optional
- auto computeType = genConstInt32FromOptionalComputeMode(
- rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnY()));
+ auto computeType =
+ genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType());
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc);
auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA());
auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB());
auto stream = adaptor.getAsyncDependencies().front();
- // retrieve the compute type, notice that it may be optional
- auto computeType = genConstInt32FromOptionalComputeMode(
- rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnmatC()));
+ auto computeType =
+ genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType());
auto bufferSize = spMMBufferSizeCallBuilder
.create(loc, rewriter,
Location loc = op.getLoc();
auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA());
auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB());
- auto computeType = genConstInt32FromOptionalComputeMode(
- rewriter, loc, adaptor.getComputeType(),
- getSpMatElemType(op.getSpmatC()));
+ auto computeType =
+ genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType());
auto stream = adaptor.getAsyncDependencies().front();
auto bufferSize = SDDMMBufferSizeCallBuilder
.create(loc, rewriter,
Location loc = op.getLoc();
auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA());
auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB());
- // retrieve the compute type, notice that it may be optional
- auto computeType = genConstInt32FromOptionalComputeMode(
- rewriter, loc, adaptor.getComputeType(), getDnElemType(op.getDnmatC()));
+ auto computeType =
+ genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType());
auto stream = adaptor.getAsyncDependencies().front();
Value pBuf =
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
- auto computeType = genConstInt32FromOptionalComputeMode(
- rewriter, loc, adaptor.getComputeType(),
- getSpMatElemType(op.getSpmatC()));
+ auto computeType =
+ genConstInt32FromComputeMode(rewriter, loc, adaptor.getComputeType());
auto modeA = genConstInt32From(rewriter, loc, adaptor.getModeA());
auto modeB = genConstInt32From(rewriter, loc, adaptor.getModeB());
auto stream = adaptor.getAsyncDependencies().front();
Value dnY = dvecY.getResult(0);
token = dvecY.getAsyncToken();
+ auto dnYType = llvm::cast<ShapedType>(y.getType()).getElementType();
+
// Precompute buffersize for SpMV.
auto bufferComp = rewriter.create<gpu::SpMVBufferSizeOp>(
- loc, indexTp, tokenTp, token, handle, spMatA, dnX, dnY);
+ loc, indexTp, tokenTp, token, handle, spMatA, dnX, dnY,
+ /*computeType=*/dnYType);
Value bufferSz = bufferComp.getResult(0);
token = bufferComp.getAsyncToken();
auto buf = genAllocBuffer(rewriter, loc, bufferSz, token);
token = buf.getAsyncToken();
// Perform the SpMV.
- auto spmvComp = rewriter.create<gpu::SpMVOp>(loc, tokenTp, token, handle,
- spMatA, dnX, dnY, buffer);
+ auto spmvComp =
+ rewriter.create<gpu::SpMVOp>(loc, tokenTp, token, handle, spMatA, dnX,
+ dnY, /*computeType=*/dnYType, buffer);
token = spmvComp.getAsyncToken();
// Copy data back to host and free all the resoures.
Value dnC = dmatC.getResult(0);
token = dmatC.getAsyncToken();
+ auto dmatCType = llvm::cast<ShapedType>(c.getType()).getElementType();
+
// Precompute buffersize for SpMM.
auto bufferComp = rewriter.create<gpu::SpMMBufferSizeOp>(
- loc, indexTp, tokenTp, token, handle, spMatA, dnB, dnC);
+ loc, indexTp, tokenTp, token, handle, spMatA, dnB, dnC,
+ /*computeType=*/dmatCType);
Value bufferSz = bufferComp.getResult(0);
token = bufferComp.getAsyncToken();
auto buf = genAllocBuffer(rewriter, loc, bufferSz, token);
Value buffer = buf.getResult(0);
token = buf.getAsyncToken();
+ auto dnCType = llvm::cast<ShapedType>(c.getType()).getElementType();
+
// Perform the SpMM.
- auto spmmComp = rewriter.create<gpu::SpMMOp>(loc, tokenTp, token, handle,
- spMatA, dnB, dnC, buffer);
+ auto spmmComp =
+ rewriter.create<gpu::SpMMOp>(loc, tokenTp, token, handle, spMatA, dnB,
+ dnC, /*computeType=*/dnCType, buffer);
token = spmmComp.getAsyncToken();
// Copy data back to host and free all the resoures.
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnvec, %token5 = gpu.create_dn_vec async [%token4] %mem2, %arg0 : memref<?xf64>
- %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec
- %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64>
+ %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64
+ %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_vec async [%token8] %dnvec
%token10 = gpu.destroy_sparse_env async [%token9] %env
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %arg0, %arg0, %mem2 : memref<?xf64>
- %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat
- %token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64>
+ %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat into f64
+ %token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %arg0, %arg0, %mem2 : memref<?xf64>
- %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat
- %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64>
+ %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64
+ %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
// CHECK: gpu.create_dn_vec async
%dnvec, %token6 = gpu.create_dn_vec async [%token5] %mem2, %arg0 : memref<?xf64>
// CHECK: gpu.spmv_buffer_size async
- %bufferSz, %token7 = gpu.spmv_buffer_size async [%token6] %env, %spmat, %dnvec, %dnvec
+ %bufferSz, %token7 = gpu.spmv_buffer_size async [%token6] %env, %spmat, %dnvec, %dnvec into f64
// CHECK: gpu.spmv async
- %token8 = gpu.spmv async [%token7] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64>
+ %token8 = gpu.spmv async [%token7] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
// CHECK: gpu.create_dn_mat async
%dnmat, %token9 = gpu.create_dn_mat async [%token8] %arg0, %arg0, %mem2 : memref<?xf64>
// CHECK: gpu.spmm_buffer_size async
- %bufferSz2, %token10 = gpu.spmm_buffer_size async [%token9] %env, %spmat, %dnmat, %dnmat
+ %bufferSz2, %token10 = gpu.spmm_buffer_size async [%token9] %env, %spmat, %dnmat, %dnmat into f64
// CHECK: gpu.spmm async
- %token11 = gpu.spmm async [%token10] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64>
+ %token11 = gpu.spmm async [%token10] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
// CHECK: gpu.sddmm_buffer_size async
- %bufferSz3, %token12 = gpu.sddmm_buffer_size async [%token11] %env, %dnmat, %dnmat, %spmat
+ %bufferSz3, %token12 = gpu.sddmm_buffer_size async [%token11] %env, %dnmat, %dnmat, %spmat into f64
// CHECK: gpu.sddmm async
- %token13 = gpu.sddmm async [%token12] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64>
+ %token13 = gpu.sddmm async [%token12] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
// CHECK: gpu.destroy_dn_mat async
%token14 = gpu.destroy_dn_mat async [%token13] %dnmat
// CHECK: gpu.destroy_sp_mat async
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_vec async [%{{.*}}] %{{.*}}, %{{.*}} : memref<?xf64>
- // CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
+ // CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_vec async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnvec, %token5 = gpu.create_dn_vec async [%token4] %mem2, %arg0 : memref<?xf64>
- %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec
- %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64>
+ %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64
+ %token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_vec async [%token8] %dnvec
%token10 = gpu.destroy_sparse_env async [%token9] %env
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_mat async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
- // CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}
- // CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
+ // CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %arg0, %arg0, %mem2 : memref<?xf64>
- %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat
- %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64>
+ %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64
+ %token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env