bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
auto &C = M.getContext();
bool Changed = false;
+
+ // ptr kernel_object, i32 private_segment_size, i32 group_segment_size
+ StructType *HandleTy = nullptr;
+
for (auto &F : M.functions()) {
if (F.hasFnAttribute("enqueued-block")) {
if (!F.hasName()) {
}
LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
- auto T = ArrayType::get(Type::getInt64Ty(C), 2);
+ if (!HandleTy) {
+ Type *Int32 = Type::getInt32Ty(C);
+ HandleTy = StructType::create(
+ C, {Type::getInt8Ty(C)->getPointerTo(0), Int32, Int32},
+ "block.runtime.handle.t");
+ }
+
auto *GV = new GlobalVariable(
- M, T,
+ M, HandleTy,
/*isConstant=*/false, GlobalValue::ExternalLinkage,
- /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
+ /*Initializer=*/Constant::getNullValue(HandleTy), RuntimeHandle,
/*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
AMDGPUAS::GLOBAL_ADDRESS,
/*isExternallyInitialized=*/false);
attributes #0 = { "enqueued-block" }
;.
; CHECK: @[[KERNEL_ADDRESS_USER:[a-zA-Z0-9_$"\\.-]+]] = global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @block_has_used_kernel_address.runtime_handle to ptr)]
-; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
;.
; CHECK-LABEL: define {{[^@]+}}@non_caller
; CHECK-SAME: (ptr addrspace(1) [[A:%.*]], i8 [[B:%.*]], ptr addrspace(1) [[C:%.*]], i64 [[D:%.*]]) {