From: Tom Stellard Date: Tue, 6 Sep 2016 20:00:26 +0000 (+0000) Subject: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies X-Git-Tag: llvmorg-4.0.0-rc1~10525 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2add8a114054a7ae3acdea286d3b6542f3be8838;p=platform%2Fupstream%2Fllvm.git AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies Summary: I put this code here, because I want to re-use it in a few other places. This supersedes some of the immediate folding code we have in SIFoldOperands. I think the peephole optimizers is probably a better place for folding immediates into copies, since it does some register coalescing in the same time. This will also make it easier to transition SIFoldOperands into a smarter pass, where it looks at all uses of instruction at once to determine the optimal way to fold operands. Right now, the pass just considers one operand at a time. Reviewers: arsenm Subscribers: wdng, nhaehnle, arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D23402 llvm-svn: 280744 --- diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 56daea6..58747af 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1184,14 +1184,39 @@ static void removeModOperands(MachineInstr &MI) { MI.RemoveOperand(Src0ModIdx); } -// TODO: Maybe this should be removed this and custom fold everything in -// SIFoldOperands? bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) return false; unsigned Opc = UseMI.getOpcode(); + if (Opc == AMDGPU::COPY) { + bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg()); + switch (DefMI.getOpcode()) { + default: + return false; + case AMDGPU::S_MOV_B64: + // TODO: We could fold 64-bit immediates, but this get compilicated + // when there are sub-registers. + return false; + + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::S_MOV_B32: + break; + } + unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; + const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); + assert(ImmOp); + // FIXME: We could handle FrameIndex values here. + if (!ImmOp->isImm()) { + return false; + } + UseMI.setDesc(get(NewOpc)); + UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm()); + UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); + return true; + } + if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { // Don't fold if we are using source modifiers. The new VOP2 instructions // don't have them. diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 14d6daa..460bb4d 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -25,6 +25,7 @@ class SOP1_Pseudo ; def S_SETREG_B32_vi : SOPK_Real_vi <0x12, S_SETREG_B32>; //def S_GETREG_REGRD_B32_vi : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments def S_SETREG_IMM32_B32_vi : SOPK_Real64<0x14, S_SETREG_IMM32_B32>, - Select_vi; \ No newline at end of file + Select_vi;