a register allocator
created spill
location.
+ ".kind" string The kind of the kernel
+ with the following
+ values:
+
+ "normal"
+ Regular kernels.
+
+ "init"
+ These kernels must be
+ invoked after loading
+ the containing code
+ object and must
+ complete before any
+ normal and fini
+ kernels in the same
+ code object are
+ invoked.
+
+ "fini"
+ These kernels must be
+ invoked before
+ unloading the
+ containing code object
+ and after all init and
+ normal kernels in the
+ same code object have
+ been invoked and
+ completed.
+
+ If omitted, "normal" is
+ assumed.
=================================== ============== ========= ================================
..
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
extern char &AMDGPUFixFunctionBitcastsID;
+ModulePass *createAMDGPUCtorDtorLoweringPass();
+void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &);
+extern char &AMDGPUCtorDtorLoweringID;
+
FunctionPass *createAMDGPULowerKernelArgumentsPass();
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
extern char &AMDGPULowerKernelArgumentsID;
--- /dev/null
+//===-- AMDGPUCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This pass creates a unified init and fini kernel with the required metadata
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
+
+namespace {
+class AMDGPUCtorDtorLowering final : public ModulePass {
+ bool runOnModule(Module &M) override;
+
+public:
+ Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
+ StringRef InitOrFiniKernelName = "amdgcn.device.init";
+ if (!IsCtor)
+ InitOrFiniKernelName = "amdgcn.device.fini";
+
+ Function *InitOrFiniKernel = Function::createWithDefaultAttr(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::InternalLinkage, 0, InitOrFiniKernelName, &M);
+ BasicBlock *InitOrFiniKernelBB =
+ BasicBlock::Create(M.getContext(), "", InitOrFiniKernel);
+ ReturnInst::Create(M.getContext(), InitOrFiniKernelBB);
+
+ InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
+ if (IsCtor)
+ InitOrFiniKernel->addFnAttr("device-init");
+ else
+ InitOrFiniKernel->addFnAttr("device-fini");
+ return InitOrFiniKernel;
+ }
+
+ bool createInitOrFiniKernel(Module &M, GlobalVariable *GV, bool IsCtor) {
+ if (!GV)
+ return false;
+ ConstantArray *GA = cast<ConstantArray>(GV->getInitializer());
+ if (GA->getNumOperands() == 0)
+ return false;
+ Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
+ IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator());
+ for (Value *V : GA->operands()) {
+ auto *CS = cast<ConstantStruct>(V);
+ if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
+ FunctionCallee Ctor =
+ M.getOrInsertFunction(F->getName(), IRB.getVoidTy());
+ IRB.CreateCall(Ctor);
+ }
+ }
+ appendToUsed(M, {InitOrFiniKernel});
+ return true;
+ }
+
+ static char ID;
+ AMDGPUCtorDtorLowering() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char AMDGPUCtorDtorLowering::ID = 0;
+char &llvm::AMDGPUCtorDtorLoweringID = AMDGPUCtorDtorLowering::ID;
+INITIALIZE_PASS(AMDGPUCtorDtorLowering, DEBUG_TYPE,
+ "Lower ctors and dtors for AMDGPU", false, false)
+
+ModulePass *llvm::createAMDGPUCtorDtorLoweringPass() {
+ return new AMDGPUCtorDtorLowering();
+}
+
+bool AMDGPUCtorDtorLowering::runOnModule(Module &M) {
+ bool Modified = false;
+ Modified |=
+ createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_ctors"),
+ /*IsCtor =*/true);
+ Modified |=
+ createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_dtors"),
+ /*IsCtor =*/false);
+ return Modified;
+}
Func.getFnAttribute("runtime-handle").getValueAsString().str(),
/*Copy=*/true);
}
+ if (Func.hasFnAttribute("device-init"))
+ Kern[".kind"] = Kern.getDocument()->getNode("init");
+ else if (Func.hasFnAttribute("device-fini"))
+ Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
initializeSIOptimizeVGPRLiveRangePass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUFixFunctionBitcastsPass(*PR);
+ initializeAMDGPUCtorDtorLoweringPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAttributorPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
disablePass(&PatchableFunctionID);
addPass(createAMDGPUPrintfRuntimeBinding());
+ addPass(createAMDGPUCtorDtorLoweringPass());
// This must occur before inlining, as the inliner will not look through
// bitcast calls.
AMDGPUCodeGenPrepare.cpp
AMDGPUExportClustering.cpp
AMDGPUFixFunctionBitcasts.cpp
+ AMDGPUCtorDtorLowering.cpp
AMDGPUFrameLowering.cpp
AMDGPUHSAMetadataStreamer.cpp
AMDGPUInstCombineIntrinsic.cpp
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+@llvm.global_ctors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @foo.5, i8* null }]
+
+define internal void @foo() {
+ ret void
+
+}
+
+define internal void @foo.5() {
+ ret void
+
+}
+
+; CHECK: ---
+; CHECK: .kind: init
+; CHECK: .name: amdgcn.device.init
+
+@llvm.global_dtors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @bar, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @bar.5, i8* null }]
+
+define internal void @bar() {
+ ret void
+
+}
+
+define internal void @bar.5() {
+ ret void
+
+}
+
+; CHECK: .kind: fini
+; CHECK: .name: amdgcn.device.fini
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS
; GCN-O0-NEXT: AMDGPU Printf lowering
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Dominator Tree Construction
+; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O0-NEXT: Fix function bitcasts for AMDGPU
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Early propagate attributes from kernels to functions
; GCN-O1-NEXT: AMDGPU Printf lowering
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
+; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O1-NEXT: Fix function bitcasts for AMDGPU
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Early propagate attributes from kernels to functions
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
+; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O1-OPTS-NEXT: Fix function bitcasts for AMDGPU
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions
; GCN-O2-NEXT: AMDGPU Printf lowering
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
+; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O2-NEXT: Fix function bitcasts for AMDGPU
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Early propagate attributes from kernels to functions
; GCN-O3-NEXT: AMDGPU Printf lowering
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
+; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
; GCN-O3-NEXT: Fix function bitcasts for AMDGPU
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Early propagate attributes from kernels to functions
--- /dev/null
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s
+
+@llvm.global_ctors = appending addrspace(1) global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }]
+@llvm.global_dtors = appending addrspace(1) global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @bar, i8* null }]
+
+; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
+; CHECK-NEXT: call void @foo
+
+; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
+; CHECK-NEXT: call void @bar
+
+define internal void @foo() {
+ ret void
+}
+
+define internal void @bar() {
+ ret void
+}
+
+; CHECK: attributes #0 = { "device-init" }
+; CHECK: attributes #1 = { "device-fini" }
--- /dev/null
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s
+
+@llvm.global_ctors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @foo.5, i8* null }]
+@llvm.global_dtors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @bar, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @bar.5, i8* null }]
+
+; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.init() #0
+; CHECK-NEXT: call void @foo
+; CHECK-NEXT: call void @foo.5
+
+; CHECK-LABEL: amdgpu_kernel void @amdgcn.device.fini() #1
+; CHECK-NEXT: call void @bar
+; CHECK-NEXT: call void @bar.5
+
+define internal void @foo() {
+ ret void
+}
+
+define internal void @bar() {
+ ret void
+}
+
+define internal void @foo.5() {
+ ret void
+}
+
+define internal void @bar.5() {
+ ret void
+}
+
+; CHECK: attributes #0 = { "device-init" }
+; CHECK: attributes #1 = { "device-fini" }
"AMDGPUCodeGenPrepare.cpp",
"AMDGPUExportClustering.cpp",
"AMDGPUFixFunctionBitcasts.cpp",
+ "AMDGPUCtorDtorLowering.cpp",
"AMDGPUFrameLowering.cpp",
"AMDGPUGlobalISelUtils.cpp",
"AMDGPUHSAMetadataStreamer.cpp",