From a062a3ed7fd82c277812d80fb83dc6f05b939a84 Mon Sep 17 00:00:00 2001
From: Denis Khalikov <khalikov.denis@huawei.com>
Date: Thu, 13 Feb 2020 13:29:13 -0500
Subject: [PATCH] [mlir][spirv] Add ConvertGpuLaunchFuncToVulkanCallsPass

Implement a pass to convert gpu.launch_func op into a sequence of
Vulkan runtime calls. The Vulkan runtime API surface is huge so currently we
don't expose separate external functions in IR for each of them, instead we
expose a few external functions to wrapper libraries which manages
Vulkan runtime.

Differential Revision: https://reviews.llvm.org/D74549
---
 .../GPUToVulkan/ConvertGPUToVulkanPass.h           |  30 +++
 mlir/include/mlir/InitAllPasses.h                  |   4 +
 mlir/lib/Conversion/CMakeLists.txt                 |   1 +
 mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt     |  16 ++
 .../GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp | 278 +++++++++++++++++++++
 .../test/Conversion/GPUToVulkan/invoke-vulkan.mlir |  45 ++++
 mlir/tools/mlir-opt/CMakeLists.txt                 |   1 +
 7 files changed, 375 insertions(+)
 create mode 100644 mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h
 create mode 100644 mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt
 create mode 100644 mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
 create mode 100644 mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir
diff --git a/mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h b/mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h
new file mode 100644
index 0000000..af2c062
--- /dev/null
+++ b/mlir/include/mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h
@@ -0,0 +1,30 @@
+//===- ConvertGPUToVulkanPass.h - GPU to Vulkan conversion pass -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The file declares a pass to convert GPU dialect ops to to Vulkan runtime
+// calls.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_GPUTOVULKAN_CONVERTGPUTOVULKANPASS_H
+#define MLIR_CONVERSION_GPUTOVULKAN_CONVERTGPUTOVULKANPASS_H
+
+#include "mlir/Support/LLVM.h"
+
+#include <memory>
+
+namespace mlir {
+
+class ModuleOp;
+template <typename T> class OpPassBase;
+
+std::unique_ptr<OpPassBase<ModuleOp>>
+createConvertGpuLaunchFuncToVulkanCallsPass();
+
+} // namespace mlir
+#endif // MLIR_CONVERSION_GPUTOVULKAN_CONVERTGPUTOVULKANPASS_H
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index e48af6f..b867e06 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -19,6 +19,7 @@
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 #include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h"
+#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
 #include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
 #include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
@@ -117,6 +118,9 @@ inline void registerAllPasses() {
   createConvertStandardToSPIRVPass();
   createLegalizeStdOpsForSPIRVLoweringPass();
   createLinalgToSPIRVPass();
+
+  // Vulkan
+  createConvertGpuLaunchFuncToVulkanCallsPass();
 }
 
 } // namespace mlir
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 665ac9b9..4634345 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -3,6 +3,7 @@ add_subdirectory(GPUToCUDA)
 add_subdirectory(GPUToNVVM)
 add_subdirectory(GPUToROCDL)
 add_subdirectory(GPUToSPIRV)
+add_subdirectory(GPUToVulkan)
 add_subdirectory(LinalgToLLVM)
 add_subdirectory(LinalgToSPIRV)
 add_subdirectory(LoopsToGPU)
diff --git a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt
new file mode 100644
index 0000000..491a1ca
--- /dev/null
+++ b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_llvm_library(MLIRGPUtoVulkanTransforms
+  ConvertLaunchFuncToVulkanCalls.cpp
+  )
+
+target_link_libraries(MLIRGPUtoVulkanTransforms
+  MLIRGPU
+  MLIRIR
+  MLIRLLVMIR
+  MLIRPass
+  MLIRSPIRV
+  MLIRSPIRVSerialization
+  MLIRStandardOps
+  MLIRSupport
+  MLIRTransforms
+  MLIRTranslation
+  )
diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
new file mode 100644
index 0000000..fda57f4
--- /dev/null
+++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp
@@ -0,0 +1,278 @@
+//===- ConvertLaunchFuncToVulkanCalls.cpp - MLIR Vulkan conversion passes -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to convert gpu.launch_func op into a sequence of
+// Vulkan runtime calls. The Vulkan runtime API surface is huge so currently we
+// don't expose separate external functions in IR for each of them, instead we
+// expose a few external functions to wrapper libraries which manages Vulkan
+// runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
+#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/SPIRV/SPIRVOps.h"
+#include "mlir/Dialect/SPIRV/Serialization.h"
+#include "mlir/Dialect/StandardOps/Ops.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Module.h"
+#include "mlir/IR/StandardTypes.h"
+#include "mlir/Pass/Pass.h"
+
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace mlir;
+
+static constexpr const char *kSetBinaryShader = "setBinaryShader";
+static constexpr const char *kSetEntryPoint = "setEntryPoint";
+static constexpr const char *kSetNumWorkGroups = "setNumWorkGroups";
+static constexpr const char *kRunOnVulkan = "runOnVulkan";
+static constexpr const char *kSPIRVBinary = "SPIRV_BIN";
+
+namespace {
+
+/// A pass to convert gpu.launch_func operation into a sequence of Vulkan
+/// runtime calls.
+///
+/// * setBinaryShader      -- sets the binary shader data
+/// * setEntryPoint        -- sets the entry point name
+/// * setNumWorkGroups     -- sets the number of a local workgroups
+/// * runOnVulkan          -- runs vulkan runtime
+///
+class GpuLaunchFuncToVulkanCalssPass
+    : public ModulePass<GpuLaunchFuncToVulkanCalssPass> {
+private:
+  LLVM::LLVMDialect *getLLVMDialect() { return llvmDialect; }
+
+  llvm::LLVMContext &getLLVMContext() {
+    return getLLVMDialect()->getLLVMContext();
+  }
+
+  void initializeCachedTypes() {
+    llvmDialect = getContext().getRegisteredDialect<LLVM::LLVMDialect>();
+    llvmVoidType = LLVM::LLVMType::getVoidTy(llvmDialect);
+    llvmPointerType = LLVM::LLVMType::getInt8PtrTy(llvmDialect);
+    llvmInt32Type = LLVM::LLVMType::getInt32Ty(llvmDialect);
+  }
+
+  LLVM::LLVMType getVoidType() { return llvmVoidType; }
+  LLVM::LLVMType getPointerType() { return llvmPointerType; }
+  LLVM::LLVMType getInt32Type() { return llvmInt32Type; }
+
+  /// Creates a SPIR-V binary shader from the given `module` using
+  /// `spirv::serialize` function.
+  LogicalResult createBinaryShader(ModuleOp module,
+                                   std::vector<char> &binaryShader);
+
+  /// Creates a LLVM global for the given `name`.
+  Value createEntryPointNameConstant(StringRef name, Location loc,
+                                     OpBuilder &builder);
+
+  /// Creates a LLVM constant for each dimension of local workgroup and
+  /// populates the given `numWorkGroups`.
+  LogicalResult createNumWorkGroups(Location loc, OpBuilder &builder,
+                                    mlir::gpu::LaunchFuncOp launchOp,
+                                    SmallVector<Value, 3> &numWorkGroups);
+
+  /// Declares all needed runtime functions.
+  void declareVulkanFunctions(Location loc);
+
+  /// Translates the given `launcOp` op to the sequence of Vulkan runtime calls
+  void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
+
+public:
+  void runOnModule() override;
+
+private:
+  LLVM::LLVMDialect *llvmDialect;
+  LLVM::LLVMType llvmVoidType;
+  LLVM::LLVMType llvmPointerType;
+  LLVM::LLVMType llvmInt32Type;
+};
+
+} // anonymous namespace
+
+void GpuLaunchFuncToVulkanCalssPass::runOnModule() {
+  initializeCachedTypes();
+
+  getModule().walk(
+      [this](mlir::gpu::LaunchFuncOp op) { translateGpuLaunchCalls(op); });
+
+  // Erase `gpu::GPUModuleOp` and `spirv::Module` operations.
+  for (auto gpuModule :
+       llvm::make_early_inc_range(getModule().getOps<gpu::GPUModuleOp>()))
+    gpuModule.erase();
+
+  for (auto spirvModule :
+       llvm::make_early_inc_range(getModule().getOps<spirv::ModuleOp>()))
+    spirvModule.erase();
+}
+
+void GpuLaunchFuncToVulkanCalssPass::declareVulkanFunctions(Location loc) {
+  ModuleOp module = getModule();
+  OpBuilder builder(module.getBody()->getTerminator());
+
+  if (!module.lookupSymbol(kSetEntryPoint)) {
+    builder.create<LLVM::LLVMFuncOp>(
+        loc, kSetEntryPoint,
+        LLVM::LLVMType::getFunctionTy(getVoidType(), {getPointerType()},
+                                      /*isVarArg=*/false));
+  }
+
+  if (!module.lookupSymbol(kSetNumWorkGroups)) {
+    builder.create<LLVM::LLVMFuncOp>(
+        loc, kSetNumWorkGroups,
+        LLVM::LLVMType::getFunctionTy(
+            getVoidType(), {getInt32Type(), getInt32Type(), getInt32Type()},
+            /*isVarArg=*/false));
+  }
+
+  if (!module.lookupSymbol(kSetBinaryShader)) {
+    builder.create<LLVM::LLVMFuncOp>(
+        loc, kSetBinaryShader,
+        LLVM::LLVMType::getFunctionTy(getVoidType(),
+                                      {getPointerType(), getInt32Type()},
+                                      /*isVarArg=*/false));
+  }
+
+  if (!module.lookupSymbol(kRunOnVulkan)) {
+    builder.create<LLVM::LLVMFuncOp>(
+        loc, kRunOnVulkan,
+        LLVM::LLVMType::getFunctionTy(getVoidType(), {},
+                                      /*isVarArg=*/false));
+  }
+}
+
+Value GpuLaunchFuncToVulkanCalssPass::createEntryPointNameConstant(
+    StringRef name, Location loc, OpBuilder &builder) {
+  std::vector<char> shaderName(name.begin(), name.end());
+  // Append `\0` to follow C style string given that LLVM::createGlobalString()
+  // won't handle this directly for us.
+  shaderName.push_back('\0');
+
+  std::string entryPointGlobalName =
+      std::string(llvm::formatv("{0}_spv_entry_point_name", name));
+  return LLVM::createGlobalString(
+      loc, builder, entryPointGlobalName,
+      StringRef(shaderName.data(), shaderName.size()), LLVM::Linkage::Internal,
+      getLLVMDialect());
+}
+
+LogicalResult GpuLaunchFuncToVulkanCalssPass::createBinaryShader(
+    ModuleOp module, std::vector<char> &binaryShader) {
+  bool done = false;
+  SmallVector<uint32_t, 0> binary;
+  for (auto spirvModule : module.getOps<spirv::ModuleOp>()) {
+    if (done) {
+      spirvModule.emitError("should only contain one 'spv.module' op");
+      return failure();
+    }
+    done = true;
+    if (failed(spirv::serialize(spirvModule, binary))) {
+      return failure();
+    }
+  }
+
+  binaryShader.resize(binary.size() * sizeof(uint32_t));
+  std::memcpy(binaryShader.data(), reinterpret_cast<char *>(binary.data()),
+              binaryShader.size());
+  return success();
+}
+
+LogicalResult GpuLaunchFuncToVulkanCalssPass::createNumWorkGroups(
+    Location loc, OpBuilder &builder, mlir::gpu::LaunchFuncOp launchOp,
+    SmallVector<Value, 3> &numWorkGroups) {
+  for (auto index : llvm::seq(0, 3)) {
+    auto numWorkGroupDimConstant = dyn_cast_or_null<ConstantOp>(
+        launchOp.getOperand(index).getDefiningOp());
+
+    if (!numWorkGroupDimConstant) {
+      return failure();
+    }
+
+    auto numWorkGroupDimValue =
+        numWorkGroupDimConstant.getValue().cast<IntegerAttr>().getInt();
+    numWorkGroups.push_back(builder.create<LLVM::ConstantOp>(
+        loc, getInt32Type(), builder.getI32IntegerAttr(numWorkGroupDimValue)));
+  }
+
+  return success();
+}
+
+// Translates gpu launch op to the sequence of Vulkan runtime calls.
+void GpuLaunchFuncToVulkanCalssPass::translateGpuLaunchCalls(
+    mlir::gpu::LaunchFuncOp launchOp) {
+  ModuleOp module = getModule();
+  OpBuilder builder(launchOp);
+  Location loc = launchOp.getLoc();
+
+  // Serialize `spirv::Module` into binary form.
+  std::vector<char> binary;
+  if (failed(
+          GpuLaunchFuncToVulkanCalssPass::createBinaryShader(module, binary))) {
+    return signalPassFailure();
+  }
+
+  // Create LLVM global with SPIR-V binary data, so we can pass a pointer with
+  // that data to runtime call.
+  Value ptrToSPIRVBinary = LLVM::createGlobalString(
+      loc, builder, kSPIRVBinary, StringRef(binary.data(), binary.size()),
+      LLVM::Linkage::Internal, getLLVMDialect());
+  // Create LLVM constant for the size of SPIR-V binary shader.
+  Value binarySize = builder.create<LLVM::ConstantOp>(
+      loc, getInt32Type(), builder.getI32IntegerAttr(binary.size()));
+  // Create call to `setBinaryShader` runtime function with the given pointer to
+  // SPIR-V binary and binary size.
+  builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getVoidType()},
+                               builder.getSymbolRefAttr(kSetBinaryShader),
+                               ArrayRef<Value>{ptrToSPIRVBinary, binarySize});
+
+  // Create LLVM global with entry point name.
+  Value entryPointName =
+      createEntryPointNameConstant(launchOp.kernel(), loc, builder);
+  // Create call to `setEntryPoint` runtime function with the given pointer to
+  // entry point name.
+  builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getVoidType()},
+                               builder.getSymbolRefAttr(kSetEntryPoint),
+                               ArrayRef<Value>{entryPointName});
+
+  // Create number of local workgroup for each dimension.
+  SmallVector<Value, 3> numWorkGroups;
+  if (failed(createNumWorkGroups(loc, builder, launchOp, numWorkGroups))) {
+    return signalPassFailure();
+  }
+
+  // Create call `setNumWorkGroups` runtime function with the given numbers of
+  // local workgroup.
+  builder.create<LLVM::CallOp>(
+      loc, ArrayRef<Type>{getVoidType()},
+      builder.getSymbolRefAttr(kSetNumWorkGroups),
+      ArrayRef<Value>{numWorkGroups[0], numWorkGroups[1], numWorkGroups[2]});
+
+  // Create call to `runOnVulkan` runtime function.
+  builder.create<LLVM::CallOp>(loc, ArrayRef<Type>{getVoidType()},
+                               builder.getSymbolRefAttr(kRunOnVulkan),
+                               ArrayRef<Value>{});
+
+  // Declare runtime functions.
+  declareVulkanFunctions(loc);
+
+  launchOp.erase();
+}
+
+std::unique_ptr<mlir::OpPassBase<mlir::ModuleOp>>
+mlir::createConvertGpuLaunchFuncToVulkanCallsPass() {
+  return std::make_unique<GpuLaunchFuncToVulkanCalssPass>();
+}
+
+static PassRegistration<GpuLaunchFuncToVulkanCalssPass>
+    pass("launch-func-to-vulkan",
+         "Convert gpu.launch_func op to Vulkan runtime calls");
diff --git a/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir b/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir
new file mode 100644
index 0000000..580c133
--- /dev/null
+++ b/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir
@@ -0,0 +1,45 @@
+// RUN: mlir-opt %s -launch-func-to-vulkan | FileCheck %s
+
+// CHECK: llvm.mlir.global internal constant @kernel_spv_entry_point_name
+// CHECK: llvm.mlir.global internal constant @SPIRV_BIN
+// CHECK: %[[addressof_SPIRV_BIN:.*]] = llvm.mlir.addressof @SPIRV_BIN
+// CHECK: %[[SPIRV_BIN_ptr:.*]] = llvm.getelementptr %[[addressof_SPIRV_BIN]]
+// CHECK: %[[SPIRV_BIN_size:.*]] = llvm.mlir.constant
+// CHECK: llvm.call @setBinaryShader(%[[SPIRV_BIN_ptr]], %[[SPIRV_BIN_size]]) : (!llvm<"i8*">, !llvm.i32) -> !llvm.void
+// CHECK: %[[addressof_entry_point:.*]] = llvm.mlir.addressof @kernel_spv_entry_point_name
+// CHECK: %[[entry_point_ptr:.*]] = llvm.getelementptr %[[addressof_entry_point]]
+// CHECK: llvm.call @setEntryPoint(%[[entry_point_ptr]]) : (!llvm<"i8*">) -> !llvm.void
+// CHECK: %[[Workgroup_X:.*]] = llvm.mlir.constant
+// CHECK: %[[Workgroup_Y:.*]] = llvm.mlir.constant
+// CHECK: %[[Workgroup_Z:.*]] = llvm.mlir.constant
+// CHECK: llvm.call @setNumWorkGroups(%[[Workgroup_X]], %[[Workgroup_Y]], %[[Workgroup_Z]]) : (!llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.void
+// CHECK: llvm.call @runOnVulkan() : () -> !llvm.void
+
+module attributes {gpu.container_module} {
+  spv.module "Logical" "GLSL450" {
+    spv.globalVariable @kernel_arg_0 bind(0, 0) : !spv.ptr<!spv.struct<f32 [0]>, StorageBuffer>
+    spv.globalVariable @kernel_arg_1 bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<12 x f32 [4]> [0]>, StorageBuffer>
+    spv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} {
+      %0 = spv._address_of @kernel_arg_1 : !spv.ptr<!spv.struct<!spv.array<12 x f32 [4]> [0]>, StorageBuffer>
+      %1 = spv._address_of @kernel_arg_0 : !spv.ptr<!spv.struct<f32 [0]>, StorageBuffer>
+      %2 = spv.constant 0 : i32
+      %3 = spv.AccessChain %1[%2] : !spv.ptr<!spv.struct<f32 [0]>, StorageBuffer>
+      %4 = spv.Load "StorageBuffer" %3 : f32
+      spv.Return
+    }
+    spv.EntryPoint "GLCompute" @kernel
+    spv.ExecutionMode @kernel "LocalSize", 1, 1, 1
+  } attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]}
+  gpu.module @kernels {
+    gpu.func @kernel(%arg0: f32, %arg1: memref<12xf32>) kernel {
+      gpu.return
+    }
+  }
+  func @foo() {
+    %0 = "op"() : () -> f32
+    %1 = "op"() : () -> memref<12xf32>
+    %c1 = constant 1 : index
+    "gpu.launch_func"(%c1, %c1, %c1, %c1, %c1, %c1, %0, %1) {kernel = "kernel", kernel_module = @kernels} : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
+    return
+  }
+}
diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt
index 99a2088..eda3652 100644
--- a/mlir/tools/mlir-opt/CMakeLists.txt
+++ b/mlir/tools/mlir-opt/CMakeLists.txt
@@ -35,6 +35,7 @@ set(LIBS
   MLIRGPUtoNVVMTransforms
   MLIRGPUtoROCDLTransforms
   MLIRGPUtoSPIRVTransforms
+  MLIRGPUtoVulkanTransforms
   MLIRLinalgOps
   MLIRLinalgAnalysis
   MLIRLinalgEDSC
-- 
2.7.4