From: Matt Arsenault Date: Fri, 22 Jul 2016 17:01:30 +0000 (+0000) Subject: AMDGPU: Add HSA dispatch id intrinsic X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8d718dcfdae7c4ce7fae4ed51ac8a931142dc89d;p=platform%2Fupstream%2Fllvm.git AMDGPU: Add HSA dispatch id intrinsic llvm-svn: 276437 --- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 387335c..b8a7c25 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -70,10 +70,42 @@ def int_r600_recipsqrt_clamped : Intrinsic< let TargetPrefix = "amdgcn" in { +//===----------------------------------------------------------------------===// +// ABI Special Intrinsics +//===----------------------------------------------------------------------===// + defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_amdgcn_workgroup_id">; +def int_amdgcn_dispatch_ptr : + GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_queue_ptr : + GCCBuiltin<"__builtin_amdgcn_queue_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_kernarg_segment_ptr : + GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_implicitarg_ptr : + GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_groupstaticsize : + GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_amdgcn_dispatch_id : + GCCBuiltin<"__builtin_amdgcn_dispatch_id">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Instruction Intrinsics +//===----------------------------------------------------------------------===// + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; @@ -331,26 +363,6 @@ def int_amdgcn_s_getreg : GCCBuiltin<"__builtin_amdgcn_s_getreg">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>; -def int_amdgcn_groupstaticsize : - GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; - -def int_amdgcn_dispatch_ptr : - GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - -def int_amdgcn_queue_ptr : - GCCBuiltin<"__builtin_amdgcn_queue_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - -def int_amdgcn_kernarg_segment_ptr : - GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - -def int_amdgcn_implicitarg_ptr : - GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - // __builtin_amdgcn_interp_p1 , , , def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 0910b28..5e85221 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -188,7 +188,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { static const StringRef HSAIntrinsicToAttr[][2] = { { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, - { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" } + { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, + { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" } }; // TODO: We should not add the attributes if the known compile time workgroup diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4beae3d..5033374 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -720,6 +720,12 @@ SDValue SITargetLowering::LowerFormalArguments( CCInfo.AllocateReg(InputPtrReg); } + if (Info->hasDispatchID()) { + unsigned DispatchIDReg = Info->addDispatchID(*TRI); + MF.addLiveIn(DispatchIDReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(DispatchIDReg); + } + if (Info->hasFlatScratchInit()) { unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI); MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SReg_64RegClass); @@ -1975,6 +1981,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); } + case Intrinsic::amdgcn_dispatch_id: { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_ID); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + } case Intrinsic::amdgcn_rcp: return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq: diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 4d12a1e..450fa5d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -68,8 +68,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), - DispatchID(false), KernargSegmentPtr(false), + DispatchID(false), FlatScratchInit(false), GridWorkgroupCountX(false), GridWorkgroupCountY(false), @@ -127,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F->hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; + + if (F->hasFnAttribute("amdgpu-dispatch-id")) + DispatchID = true; } // We don't need to worry about accessing spills with flat instructions. @@ -174,6 +177,13 @@ unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) return KernargSegmentPtrUserSGPR; } +unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { + DispatchIDUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return DispatchIDUserSGPR; +} + unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index f5bd636..2eec9bf 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -92,8 +92,8 @@ private: bool PrivateSegmentBuffer : 1; bool DispatchPtr : 1; bool QueuePtr : 1; - bool DispatchID : 1; bool KernargSegmentPtr : 1; + bool DispatchID : 1; bool FlatScratchInit : 1; bool GridWorkgroupCountX : 1; bool GridWorkgroupCountY : 1; @@ -143,6 +143,7 @@ public: unsigned addDispatchPtr(const SIRegisterInfo &TRI); unsigned addQueuePtr(const SIRegisterInfo &TRI); unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); + unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); // Add system SGPRs. @@ -192,14 +193,14 @@ public: return QueuePtr; } - bool hasDispatchID() const { - return DispatchID; - } - bool hasKernargSegmentPtr() const { return KernargSegmentPtr; } + bool hasDispatchID() const { + return DispatchID; + } + bool hasFlatScratchInit() const { return FlatScratchInit; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 0dd88ee..f5c3c62 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -931,7 +931,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; case SIRegisterInfo::DISPATCH_ID: - llvm_unreachable("unimplemented"); + assert(MFI->hasDispatchID()); + return MFI->DispatchIDUserSGPR; case SIRegisterInfo::FLAT_SCRATCH_INIT: assert(MFI->hasFlatScratchInit()); return MFI->FlatScratchInitUserSGPR; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll new file mode 100644 index 0000000..6c09aa5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.dispatch.id() #1 + +; GCN-LABEL: {{^}}dispatch_id: +; GCN: .amd_kernel_code_t +; GCN: enable_sgpr_dispatch_id = 1 + +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7 +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @dispatch_id(i64 addrspace(1)* %out) #0 { + %tmp0 = call i64 @llvm.amdgcn.dispatch.id() + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }