From c7709e1c29d2fce5c4fc33f06c21899b24e2a618 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 24 Apr 2018 20:51:28 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Add support for amdgpu_ps calling convention Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D45837 llvm-svn: 330767 --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 63 +++++++++++++++++----- .../AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll | 16 ++++++ 2 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 58e8b68..9cb647c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -139,40 +139,75 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, unsigned NumArgs = F.arg_size(); Function::const_arg_iterator CurOrigArg = F.arg_begin(); const AMDGPUTargetLowering &TLI = *getTLI(); + unsigned PSInputNum = 0; + BitVector Skipped(NumArgs); for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); // We can only hanlde simple value types at the moment. - if (!ValEVT.isSimple()) - return false; - MVT ValVT = ValEVT.getSimpleVT(); ISD::ArgFlagsTy Flags; ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()}; setArgFlags(OrigArg, i + 1, DL, F); Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); + + if (F.getCallingConv() == CallingConv::AMDGPU_PS && + !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && + PSInputNum <= 15) { + if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { + Skipped.set(i); + ++PSInputNum; + continue; + } + + Info->markPSInputAllocated(PSInputNum); + if (!CurOrigArg->use_empty()) + Info->markPSInputEnabled(PSInputNum); + + ++PSInputNum; + } + CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); - // Fail if we don't know how to handle this type. - if (Res) - return false; + if (ValEVT.isVector()) { + EVT ElemVT = ValEVT.getVectorElementType(); + if (!ValEVT.isSimple()) + return false; + MVT ValVT = ElemVT.getSimpleVT(); + bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, + OrigArg.Flags, CCInfo); + if (!Res) + return false; + } else { + MVT ValVT = ValEVT.getSimpleVT(); + if (!ValEVT.isSimple()) + return false; + bool Res = + AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); + + // Fail if we don't know how to handle this type. + if (Res) + return false; + } } Function::const_arg_iterator Arg = F.arg_begin(); - if (F.getCallingConv() == CallingConv::AMDGPU_VS) { - for (unsigned i = 0; i != NumArgs; ++i, ++Arg) { - CCValAssign &VA = ArgLocs[i]; - MRI.addLiveIn(VA.getLocReg(), VRegs[i]); + if (F.getCallingConv() == CallingConv::AMDGPU_VS || + F.getCallingConv() == CallingConv::AMDGPU_PS) { + for (unsigned i = 0, OrigArgIdx = 0; + OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { + if (Skipped.test(OrigArgIdx)) + continue; + CCValAssign &VA = ArgLocs[i++]; + MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]); MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); - MIRBuilder.buildCopy(VRegs[i], VA.getLocReg()); + MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg()); } return true; } - for (unsigned i = 0; i != NumArgs; ++i, ++Arg) { + for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) { // FIXME: We should be getting DebugInfo from the arguments some how. CCValAssign &VA = ArgLocs[i]; lowerParameter(MIRBuilder, Arg->getType(), diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll new file mode 100644 index 0000000..2ead7bd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s + +; Check that we correctly skip over disabled inputs +; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0 +; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32) +define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 { +main_body: + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 + ret void +} + +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { "InitialPSInputAddr"="0x00002" } -- 2.7.4