From 01c755ff80cbb795f507cf4317b4a4be1a31484d Mon Sep 17 00:00:00 2001 From: Vinayaka Bandishti Date: Mon, 5 Jun 2023 10:32:51 +0530 Subject: [PATCH] Make optimize llvm common to both gpu-to-hsaco/cubin Before serializing, optimizations on llvm were only called on path to hsaco, and not cubin. Define opt-level for `gpu-to-cubin` pass as well, and move call to optimize llvm to a common place. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D151554 --- mlir/include/mlir/Dialect/GPU/Transforms/Passes.h | 8 ++++-- mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + .../lib/Dialect/GPU/Transforms/SerializeToBlob.cpp | 21 ++++++++++++-- .../Dialect/GPU/Transforms/SerializeToCubin.cpp | 13 ++++++--- .../Dialect/GPU/Transforms/SerializeToHsaco.cpp | 33 ---------------------- 5 files changed, 35 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h index 89a45a4..d24d4d8 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -110,6 +110,9 @@ protected: ::llvm::cl::desc("Target architecture")}; Option features{*this, "features", ::llvm::cl::desc("Target features")}; + Option optLevel{*this, "opt-level", + llvm::cl::desc("Optimization level for compilation"), + llvm::cl::init(2)}; Option gpuBinaryAnnotation{ *this, "gpu-binary-annotation", llvm::cl::desc("Annotation attribute string for GPU binary"), @@ -130,10 +133,11 @@ void registerGpuSerializeToCubinPass(); void registerGpuSerializeToHsacoPass(); /// Create an instance of the GPU kernel function to CUBIN binary serialization -/// pass. +/// pass with optLevel (default level 2). std::unique_ptr createGpuSerializeToCubinPass(StringRef triple, StringRef chip, - StringRef features); + StringRef features, + int optLevel = 2); /// Create an instance of the GPU kernel function to HSAco binary serialization /// pass. diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index ca16333..4250e40 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -61,6 +61,7 @@ add_mlir_dialect_library(MLIRGPUTransforms LINK_COMPONENTS Core MC + Target ${NVPTX_LIBS} ${AMDGPU_LIBS} diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp index d82e6ca..97aba90 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -102,8 +103,24 @@ void gpu::SerializeToBlobPass::runOnOperation() { LogicalResult gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine) { - // TODO: If serializeToCubin ends up defining optimizations, factor them - // into here from SerializeToHsaco + int optLevel = this->optLevel.getValue(); + if (optLevel < 0 || optLevel > 3) + return getOperation().emitError() + << "invalid optimization level " << optLevel; + + targetMachine.setOptLevel(static_cast(optLevel)); + + auto transformer = + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); + auto error = transformer(&llvmModule); + if (error) { + InFlightDiagnostic mlirError = getOperation()->emitError(); + llvm::handleAllErrors( + std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { + mlirError << "could not optimize LLVM IR: " << ei.message(); + }); + return mlirError; + } return success(); } diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp index 44a1402..4df9e6d 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp @@ -49,7 +49,8 @@ public: MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass) SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda", - StringRef chip = "sm_35", StringRef features = "+ptx60"); + StringRef chip = "sm_35", StringRef features = "+ptx60", + int optLevel = 2); StringRef getArgument() const override { return "gpu-to-cubin"; } StringRef getDescription() const override { @@ -72,10 +73,12 @@ static void maybeSetOption(Pass::Option &option, StringRef value) { } SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip, - StringRef features) { + StringRef features, int optLevel) { maybeSetOption(this->triple, triple); maybeSetOption(this->chip, chip); maybeSetOption(this->features, features); + if (this->optLevel.getNumOccurrences() == 0) + this->optLevel.setValue(optLevel); } void SerializeToCubinPass::getDependentDialects( @@ -147,8 +150,10 @@ void mlir::registerGpuSerializeToCubinPass() { std::unique_ptr mlir::createGpuSerializeToCubinPass(StringRef triple, StringRef arch, - StringRef features) { - return std::make_unique(triple, arch, features); + StringRef features, + int optLevel) { + return std::make_unique(triple, arch, features, + optLevel); } #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp index 42cb246..108b8ab 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -74,11 +74,6 @@ public: } protected: - Option optLevel{ - *this, "opt-level", - llvm::cl::desc("Optimization level for HSACO compilation"), - llvm::cl::init(2)}; - Option rocmPath{*this, "rocm-path", llvm::cl::desc("Path to ROCm install")}; @@ -86,10 +81,6 @@ protected: std::unique_ptr translateToLLVMIR(llvm::LLVMContext &llvmContext) override; - /// Adds LLVM optimization passes - LogicalResult optimizeLlvm(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) override; - private: void getDependentDialects(DialectRegistry ®istry) const override; @@ -320,30 +311,6 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { return ret; } -LogicalResult -SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { - int optLevel = this->optLevel.getValue(); - if (optLevel < 0 || optLevel > 3) - return getOperation().emitError() - << "Invalid HSA optimization level" << optLevel << "\n"; - - targetMachine.setOptLevel(static_cast(optLevel)); - - auto transformer = - makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); - auto error = transformer(&llvmModule); - if (error) { - InFlightDiagnostic mlirError = getOperation()->emitError(); - llvm::handleAllErrors( - std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { - mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n"; - }); - return mlirError; - } - return success(); -} - std::unique_ptr> SerializeToHsacoPass::assembleIsa(const std::string &isa) { auto loc = getOperation().getLoc(); -- 2.7.4