From: Tobias Grosser Date: Tue, 19 Jul 2016 07:32:44 +0000 (+0000) Subject: GPGPU: add intrinsic functions to obtain a kernels thread and block ids X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=472f9654c8da636ef5a2bd59ee39082d3fd33985;p=platform%2Fupstream%2Fllvm.git GPGPU: add intrinsic functions to obtain a kernels thread and block ids llvm-svn: 275953 --- diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index fbf9e25..d1bdf55 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -97,6 +97,17 @@ private: /// The GPU program we generate code for. gpu_prog *Prog; + /// Class to free isl_ids. + class IslIdDeleter { + public: + void operator()(__isl_take isl_id *Id) { isl_id_free(Id); }; + }; + + /// A set containing all isl_ids allocated in a GPU kernel. + /// + /// By releasing this set all isl_ids will be freed. + std::set> KernelIDs; + /// Create code for user-defined AST nodes. /// /// These AST nodes can be of type: @@ -137,6 +148,11 @@ private: /// @returns The newly declared function. Function *createKernelFunctionDecl(ppcg_kernel *Kernel); + /// Insert intrinsic functions to obtain thread and block ids. + /// + /// @param The kernel to generate the intrinsic functions for. + void insertKernelIntrinsics(ppcg_kernel *Kernel); + /// Finalize the generation of the kernel function. /// /// Free the LLVM-IR module corresponding to the kernel and -- if requested -- @@ -172,10 +188,12 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { assert(Kernel->tree && "Device AST of kernel node is empty"); Instruction &HostInsertPoint = *Builder.GetInsertPoint(); + IslExprBuilder::IDToValueTy HostIDs = IDToValue; createKernelFunction(Kernel); Builder.SetInsertPoint(&HostInsertPoint); + IDToValue = HostIDs; finalizeKernelFunction(); } @@ -222,6 +240,35 @@ Function *GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel) { return FN; } +void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) { + Intrinsic::ID IntrinsicsBID[] = {Intrinsic::nvvm_read_ptx_sreg_ctaid_x, + Intrinsic::nvvm_read_ptx_sreg_ctaid_y}; + + Intrinsic::ID IntrinsicsTID[] = {Intrinsic::nvvm_read_ptx_sreg_tid_x, + Intrinsic::nvvm_read_ptx_sreg_tid_y, + Intrinsic::nvvm_read_ptx_sreg_tid_z}; + + auto addId = [this](__isl_take isl_id *Id, Intrinsic::ID Intr) mutable { + std::string Name = isl_id_get_name(Id); + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *IntrinsicFn = Intrinsic::getDeclaration(M, Intr); + Value *Val = Builder.CreateCall(IntrinsicFn, {}); + Val = Builder.CreateIntCast(Val, Builder.getInt64Ty(), false, Name); + IDToValue[Id] = Val; + KernelIDs.insert(std::unique_ptr(Id)); + }; + + for (int i = 0; i < Kernel->n_grid; ++i) { + isl_id *Id = isl_id_list_get_id(Kernel->block_ids, i); + addId(Id, IntrinsicsBID[i]); + } + + for (int i = 0; i < Kernel->n_block; ++i) { + isl_id *Id = isl_id_list_get_id(Kernel->thread_ids, i); + addId(Id, IntrinsicsTID[i]); + } +} + void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel) { std::string Identifier = "kernel_" + std::to_string(Kernel->id); @@ -236,6 +283,8 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel) { Builder.SetInsertPoint(EntryBlock); Builder.CreateRetVoid(); Builder.SetInsertPoint(EntryBlock, EntryBlock->begin()); + + insertKernelIntrinsics(Kernel); } void GPUNodeBuilder::finalizeKernelFunction() { @@ -244,6 +293,7 @@ void GPUNodeBuilder::finalizeKernelFunction() { outs() << *GPUModule << "\n"; GPUModule.release(); + KernelIDs.clear(); } namespace { diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 1e977db..5e80c3f 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -10,6 +10,10 @@ ; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \ ; RUN: FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck %s -check-prefix=KERNEL-IR + ; REQUIRES: pollyacc ; CHECK: Stmt_bb5 @@ -89,6 +93,19 @@ ; IR: polly.exiting: ; IR-NEXT: br label %polly.merge_new_and_old +; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A) { +; KERNEL-IR-NEXT: entry: +; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 +; KERNEL-IR-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() +; KERNEL-IR-NEXT: %b1 = zext i32 %1 to i64 +; KERNEL-IR-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +; KERNEL-IR-NEXT: %t0 = zext i32 %2 to i64 +; KERNEL-IR-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() +; KERNEL-IR-NEXT: %t1 = zext i32 %3 to i64 +; KERNEL-IR-NEXT: ret void +; KERNEL-IR-NEXT: } + ; void double_parallel_loop(float A[][1024]) { ; for (long i = 0; i < 1024; i++) ; for (long j = 0; j < 1024; j++) diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll index 3bd3f96..206f72d 100644 --- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll +++ b/polly/test/GPGPU/kernel-params-only-some-arrays.ll @@ -17,6 +17,10 @@ ; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) { ; KERNEL-NEXT: entry: +; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +; KERNEL-NEXT: %b0 = zext i32 %0 to i64 +; KERNEL-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +; KERNEL-NEXT: %t0 = zext i32 %1 to i64 ; KERNEL-NEXT: ret void ; KERNEL-NEXT: } @@ -27,6 +31,10 @@ ; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) { ; KERNEL-NEXT: entry: +; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +; KERNEL-NEXT: %b0 = zext i32 %0 to i64 +; KERNEL-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +; KERNEL-NEXT: %t0 = zext i32 %1 to i64 ; KERNEL-NEXT: ret void ; KERNEL-NEXT: }