[mlir][sparse][gpu] sparse GPU code generator pipeline setup
authorAart Bik <ajcbik@google.com>
Wed, 5 Apr 2023 19:57:23 +0000 (12:57 -0700)
committerAart Bik <ajcbik@google.com>
Wed, 5 Apr 2023 22:12:54 +0000 (15:12 -0700)
Reviewed By: Peiming

Differential Revision: https://reviews.llvm.org/D147571

mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

index d9afcae..8da020c 100644 (file)
@@ -122,6 +122,14 @@ struct SparseCompilerOptions
            "dialect"),
       init(false)};
 
+  /// These options are used to enable GPU code generation.
+  PassOptions::Option<std::string> gpuTriple{*this, "gpu-triple",
+                                             desc("GPU target triple")};
+  PassOptions::Option<std::string> gpuChip{*this, "gpu-chip",
+                                           desc("GPU target architecture")};
+  PassOptions::Option<std::string> gpuFeatures{*this, "gpu-features",
+                                               desc("GPU target features")};
+
   /// Projects out the options for `createSparsificationPass`.
   SparsificationOptions sparsificationOptions() const {
     return SparsificationOptions(parallelization, enableIndexReduction);
index 87daf76..234a0d8 100644 (file)
@@ -12,6 +12,8 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines
   MLIRComplexToLibm
   MLIRComplexToStandard
   MLIRFuncTransforms
+  MLIRGPUToNVVMTransforms
+  MLIRGPUTransforms
   MLIRLinalgTransforms
   MLIRMathToLibm
   MLIRMathToLLVM
index 47c1601..a2fa480 100644 (file)
@@ -8,12 +8,16 @@
 
 #include "mlir/Dialect/SparseTensor/Pipelines/Passes.h"
 
+#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Arith/Transforms/Passes.h"
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
@@ -65,6 +69,16 @@ void mlir::sparse_tensor::buildSparseCompiler(
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(
       mlir::bufferization::createFinalizingBufferizePass());
+
+  // GPU code generation.
+  const bool gpuCodegen = options.gpuTriple.hasValue();
+  if (gpuCodegen) {
+    pm.addPass(createSparseGPUCodegenPass());
+    pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
+    pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
+    pm.addNestedPass<gpu::GPUModuleOp>(createLowerGpuOpsToNVVMOpsPass());
+  }
+
   // TODO(springerm): Add sparse support to the BufferDeallocation pass and add
   // it to this pipeline.
   pm.addNestedPass<func::FuncOp>(createConvertLinalgToLoopsPass());
@@ -75,7 +89,7 @@ void mlir::sparse_tensor::buildSparseCompiler(
   pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
   pm.addPass(createFinalizeMemRefToLLVMConversionPass());
   pm.addNestedPass<func::FuncOp>(createConvertComplexToStandardPass());
-  pm.addNestedPass<mlir::func::FuncOp>(mlir::arith::createArithExpandOpsPass());
+  pm.addNestedPass<func::FuncOp>(arith::createArithExpandOpsPass());
   pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
   pm.addPass(createConvertMathToLibmPass());
   pm.addPass(createConvertComplexToLibmPass());
@@ -84,6 +98,16 @@ void mlir::sparse_tensor::buildSparseCompiler(
   pm.addPass(createConvertComplexToLLVMPass());
   pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
   pm.addPass(createConvertFuncToLLVMPass());
+
+  // Finalize GPU code generation.
+  if (gpuCodegen) {
+#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
+    pm.addNestedPass<gpu::GPUModuleOp>(createGpuSerializeToCubinPass(
+        options.gpuTriple, options.gpuChip, options.gpuFeatures));
+#endif
+    pm.addPass(createGpuToLLVMConversionPass());
+  }
+
   pm.addPass(createReconcileUnrealizedCastsPass());
 }
 
index 7cc26bf..711a51d 100644 (file)
@@ -2372,15 +2372,21 @@ cc_library(
     srcs = glob(["lib/Dialect/SparseTensor/Pipelines/*.cpp"]),
     hdrs = ["include/mlir/Dialect/SparseTensor/Pipelines/Passes.h"],
     includes = ["include"],
+    local_defines = if_cuda_available(["MLIR_GPU_TO_CUBIN_PASS_ENABLE"]),
     deps = [
         ":ArithTransforms",
         ":BufferizationTransforms",
         ":ConversionPasses",
         ":FuncDialect",
         ":FuncTransforms",
+        ":GPUDialect",
+        ":GPUToNVVMTransforms",
+        ":GPUTransforms",
         ":LinalgTransforms",
         ":MemRefTransforms",
+        ":NVVMDialect",
         ":Pass",
+        ":SerializeToCubin",
         ":SparseTensorDialect",
         ":SparseTensorTransforms",
         ":TensorTransforms",