From: Tom Stellard Date: Tue, 6 Dec 2016 21:13:30 +0000 (+0000) Subject: AMDGPU/SI: Don't move copies of immediates to the VALU X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=00cfa747156c8a7bb25be3e6a85272a61b0b15ca;p=platform%2Fupstream%2Fllvm.git AMDGPU/SI: Don't move copies of immediates to the VALU Summary: If we write an immediate to a VGPR and then copy the VGPR to an SGPR, we can replace the copy with a S_MOV_B32 sgpr, imm, rather than moving the copy to the SALU. Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye Differential Revision: https://reviews.llvm.org/D27272 llvm-svn: 288849 --- diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 8c4e0ad..6a422e7 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -294,6 +294,38 @@ static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, return false; } +static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, + const MachineInstr *MoveImm, + const SIInstrInfo *TII, + unsigned &SMovOp, + int64_t &Imm) { + + if (!MoveImm->isMoveImmediate()) + return false; + + const MachineOperand *ImmOp = + TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0); + if (!ImmOp->isImm()) + return false; + + // FIXME: Handle copies with sub-regs. + if (Copy->getOperand(0).getSubReg()) + return false; + + switch (MoveImm->getOpcode()) { + default: + return false; + case AMDGPU::V_MOV_B32_e32: + SMovOp = AMDGPU::S_MOV_B32; + break; + case AMDGPU::V_MOV_B64_PSEUDO: + SMovOp = AMDGPU::S_MOV_B64; + break; + } + Imm = ImmOp->getImm(); + return true; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -323,7 +355,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); + MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + unsigned SMovOp; + int64_t Imm; + // If we are just copying an immediate, we can replace the copy with + // s_mov_b32. + if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { + MI.getOperand(1).ChangeToImmediate(Imm); + MI.addImplicitDefUseOperands(MF); + MI.setDesc(TII->get(SMovOp)); + break; + } TII->moveToVALU(MI); } diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index ff01306..37083fb 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -478,5 +478,30 @@ bb4: br label %bb1 } +; GCN-LABEL: {{^}}phi_imm_in_sgprs +; GCN: s_movk_i32 [[A:s[0-9]+]], 0x400 +; GCN: s_movk_i32 [[B:s[0-9]+]], 0x400 +; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]: +; GCN: s_xor_b32 [[B]], [[B]], [[A]] +; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]] +define void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { +entry: + br label %loop + +loop: + %i = phi i32 [0, %entry], [%i.add, %loop] + %offset = phi i32 [1024, %entry], [%offset.xor, %loop] + %offset.xor = xor i32 %offset, 1024 + %offset.i = add i32 %offset.xor, %i + %ptr = getelementptr i32, i32 addrspace(3)* %out, i32 %offset.i + store i32 0, i32 addrspace(3)* %ptr + %i.add = add i32 %i, 1 + %cmp = icmp ult i32 %i.add, %cond + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind }