[mlir][sparse][gpu] generate single module, unique kernel names

author Aart Bik <ajcbik@google.com>

Sat, 15 Apr 2023 01:30:29 +0000 (18:30 -0700)

committer Aart Bik <ajcbik@google.com>

Sun, 16 Apr 2023 00:25:36 +0000 (17:25 -0700)
author Aart Bik <ajcbik@google.com>
Sat, 15 Apr 2023 01:30:29 +0000 (18:30 -0700)
committer Aart Bik <ajcbik@google.com>
Sun, 16 Apr 2023 00:25:36 +0000 (17:25 -0700)
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

index 28b5f72..96346d9 100644 (file)
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -40,24 +40,36 @@ static void markAsGPUContainer(ModuleOp topModule) {
                       UnitAttr::get(topModule->getContext()));
  }
  
-/// Constructs a new GPU module (for GPU kernels) inside the given top module.
-static gpu::GPUModuleOp genGPUModule(OpBuilder &builder, ModuleOp topModule,
-                                     StringRef name) {
+/// Constructs a new GPU module (for GPU kernels) inside the given top module,
+/// or returns an existing GPU module if one was built previously.
+static gpu::GPUModuleOp genGPUModule(OpBuilder &builder, ModuleOp topModule) {
+  for (auto op : topModule.getBodyRegion().getOps<gpu::GPUModuleOp>())
+    return op; // existing
    markAsGPUContainer(topModule);
    builder.setInsertionPointToStart(&topModule.getBodyRegion().front());
-  return builder.create<gpu::GPUModuleOp>(topModule->getLoc(), name);
+  return builder.create<gpu::GPUModuleOp>(topModule->getLoc(),
+                                          "sparse_kernels");
  }
  
  /// Constructs a new GPU kernel in the given GPU module.
  static gpu::GPUFuncOp genGPUFunc(OpBuilder &builder, gpu::GPUModuleOp gpuModule,
-                                 StringRef name, SmallVectorImpl<Value> &args) {
+                                 SmallVectorImpl<Value> &args) {
+  // Get a unique kernel name. Not very creative,
+  // but we simply try kernel0, kernel1, etc.
+  unsigned kernelNumber = 0;
+  SmallString<16> kernelName;
+  do {
+    kernelName.clear();
+    ("kernel" + Twine(kernelNumber++)).toStringRef(kernelName);
+  } while (gpuModule.lookupSymbol(kernelName));
+  // Then we insert a new kernel with given arguments into the module.
    builder.setInsertionPointToStart(&gpuModule.getBodyRegion().front());
    SmallVector<Type> argsTp;
    for (unsigned i = 0, e = args.size(); i < e; i++)
      argsTp.push_back(args[i].getType());
    FunctionType type = FunctionType::get(gpuModule->getContext(), argsTp, {});
    auto gpuFunc =
-      builder.create<gpu::GPUFuncOp>(gpuModule->getLoc(), name, type);
+      builder.create<gpu::GPUFuncOp>(gpuModule->getLoc(), kernelName, type);
    gpuFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                     builder.getUnitAttr());
    return gpuFunc;
@@ -208,12 +220,9 @@ struct ForallRewriter : public OpRewritePattern<scf::ParallelOp> {
        args.push_back(genHostRegisterMemref(rewriter, loc, b));
      auto saveIp = rewriter.saveInsertionPoint();
      // Set up GPU module and construct GPU function.
-    //
-    // TODO: only generate once, avoid name conflict
-    //
      ModuleOp topModule = forallOp->getParentOfType<ModuleOp>();
-    auto gpuModule = genGPUModule(rewriter, topModule, "sparsekernels");
-    auto gpuFunc = genGPUFunc(rewriter, gpuModule, "kernel", args);
+    auto gpuModule = genGPUModule(rewriter, topModule);
+    auto gpuFunc = genGPUFunc(rewriter, gpuModule, args);
      genGPUCode(rewriter, gpuFunc, forallOp, constants, scalars, buffers);
      // Generate code that launches the kernel.
      rewriter.restoreInsertionPoint(saveIp);
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir

new file mode 100644 (file)

index 0000000..ec7c30e
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s --linalg-generalize-named-ops \
+// RUN:             --pre-sparsification-rewrite \
+// RUN:             --sparsification="parallelization-strategy=dense-outer-loop" \
+// RUN:             --sparse-gpu-codegen | FileCheck %s
+
+#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
+
+//
+// CHECK-LABEL: gpu.module @sparse_kernels
+// CHECK-DAG:   gpu.func @kernel0
+// CHECK-DAG:   gpu.func @kernel1
+//
+// CHECK-LABEL: func.func @matmuls
+// CHECK-DAG:   gpu.launch_func @sparse_kernels::@kernel0 blocks
+// CHECK-DAG:   gpu.launch_func @sparse_kernels::@kernel1 blocks
+//
+func.func @matmuls(%A: tensor<1024x8xf64>,
+                   %B: tensor<8x1024xf64, #CSR>,
+                  %C: tensor<1024x1024xf64, #CSR>) -> tensor<1024x1024xf64> {
+  %Z = arith.constant dense<0.0> : tensor<1024x1024xf64>
+  %T = linalg.matmul
+      ins(%A, %B: tensor<1024x8xf64>, tensor<8x1024xf64, #CSR>)
+      outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64>
+  %D = linalg.matmul
+      ins(%T, %C: tensor<1024x1024xf64>, tensor<1024x1024xf64, #CSR>)
+      outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64>
+  return %D : tensor<1024x1024xf64>
+}
+
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir

index e42bbb0..92d5941 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir
@@ -8,7 +8,8 @@
  //
  // Compute matrix matrix C = AB
  //
-// CHECK-LABEL: gpu.func @kernel(
+// CHECK-LABEL: gpu.module @sparse_kernels
+// CHECK-LABEL: gpu.func @kernel0(
  // CHECK-SAME:        %[[VAL_0:.*0]]: index,
  // CHECK-SAME:        %[[VAL_1:.*1]]: index,
  // CHECK-SAME:        %[[VAL_2:.*2]]: memref<?xindex>,
@@ -51,7 +52,7 @@
  // CHECK:       gpu.host_register
  // CHECK:       gpu.host_register
  // CHECK:       gpu.host_register
-// CHECK:       gpu.launch_func  @sparsekernels::@kernel blocks
+// CHECK:       gpu.launch_func @sparse_kernels::@kernel0 blocks
  //
  func.func @matmul(%A: tensor<?x?xf64, #CSR>, %B: tensor<?x?xf64>, %C_in: tensor<?x?xf64>) -> tensor<?x?xf64> {
    %C_out = linalg.matmul
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir

index 96b7f9d..05dfc58 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir
@@ -8,8 +8,8 @@
  //
  // Compute matrix vector y = Ax
  //
-//
-// CHECK:       gpu.func @kernel(
+// CHECK-LABEL: gpu.module @sparse_kernels
+// CHECK:       gpu.func @kernel0(
  // CHECK-SAME:          %[[VAL_0:.*0]]: index,
  // CHECK-SAME:          %[[VAL_1:.*1]]: memref<?xf64>,
  // CHECK-SAME:          %[[VAL_2:.*2]]: memref<?xindex>,
@@ -48,7 +48,7 @@
  // CHECK:       gpu.host_register
  // CHECK:       gpu.host_register
  // CHECK:       gpu.host_register
-// CHECK:       gpu.launch_func  @sparsekernels::@kernel blocks
+// CHECK:       gpu.launch_func @sparse_kernels::@kernel0 blocks
  //
  func.func @matvec(%A: tensor<?x?xf64, #CSR>, %x: tensor<?xf64>, %y_in: tensor<?xf64>) -> tensor<?xf64> {
    %y_out = linalg.matvec
author	Aart Bik <ajcbik@google.com>
	Sat, 15 Apr 2023 01:30:29 +0000 (18:30 -0700)
committer	Aart Bik <ajcbik@google.com>
	Sun, 16 Apr 2023 00:25:36 +0000 (17:25 -0700)
mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp		patch \| blob \| history
mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir	[new file with mode: 0644]	patch \| blob
mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir		patch \| blob \| history