"dialect"),
init(false)};
+ /// These options are used to enable GPU code generation.
+ PassOptions::Option<std::string> gpuTriple{*this, "gpu-triple",
+ desc("GPU target triple")};
+ PassOptions::Option<std::string> gpuChip{*this, "gpu-chip",
+ desc("GPU target architecture")};
+ PassOptions::Option<std::string> gpuFeatures{*this, "gpu-features",
+ desc("GPU target features")};
+
/// Projects out the options for `createSparsificationPass`.
SparsificationOptions sparsificationOptions() const {
return SparsificationOptions(parallelization, enableIndexReduction);
#include "mlir/Dialect/SparseTensor/Pipelines/Passes.h"
+#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
pm.addNestedPass<func::FuncOp>(
mlir::bufferization::createFinalizingBufferizePass());
+
+ // GPU code generation.
+ const bool gpuCodegen = options.gpuTriple.hasValue();
+ if (gpuCodegen) {
+ pm.addPass(createSparseGPUCodegenPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createLowerGpuOpsToNVVMOpsPass());
+ }
+
// TODO(springerm): Add sparse support to the BufferDeallocation pass and add
// it to this pipeline.
pm.addNestedPass<func::FuncOp>(createConvertLinalgToLoopsPass());
pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
pm.addNestedPass<func::FuncOp>(createConvertComplexToStandardPass());
- pm.addNestedPass<mlir::func::FuncOp>(mlir::arith::createArithExpandOpsPass());
+ pm.addNestedPass<func::FuncOp>(arith::createArithExpandOpsPass());
pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
pm.addPass(createConvertMathToLibmPass());
pm.addPass(createConvertComplexToLibmPass());
pm.addPass(createConvertComplexToLLVMPass());
pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
pm.addPass(createConvertFuncToLLVMPass());
+
+ // Finalize GPU code generation.
+ if (gpuCodegen) {
+#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
+ pm.addNestedPass<gpu::GPUModuleOp>(createGpuSerializeToCubinPass(
+ options.gpuTriple, options.gpuChip, options.gpuFeatures));
+#endif
+ pm.addPass(createGpuToLLVMConversionPass());
+ }
+
pm.addPass(createReconcileUnrealizedCastsPass());
}
srcs = glob(["lib/Dialect/SparseTensor/Pipelines/*.cpp"]),
hdrs = ["include/mlir/Dialect/SparseTensor/Pipelines/Passes.h"],
includes = ["include"],
+ local_defines = if_cuda_available(["MLIR_GPU_TO_CUBIN_PASS_ENABLE"]),
deps = [
":ArithTransforms",
":BufferizationTransforms",
":ConversionPasses",
":FuncDialect",
":FuncTransforms",
+ ":GPUDialect",
+ ":GPUToNVVMTransforms",
+ ":GPUTransforms",
":LinalgTransforms",
":MemRefTransforms",
+ ":NVVMDialect",
":Pass",
+ ":SerializeToCubin",
":SparseTensorDialect",
":SparseTensorTransforms",
":TensorTransforms",