From: Matt Arsenault Date: Sat, 27 Aug 2016 01:00:37 +0000 (+0000) Subject: AMDGPU: Move cndmask pseudo to be isel pseudo X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=22e417956d67efb602a756eb95ab61e48482a3f6;p=platform%2Fupstream%2Fllvm.git AMDGPU: Move cndmask pseudo to be isel pseudo There's only one use of this for the convenience of a pattern. I think v_mov_b64_pseudo should also be moved, but SIFoldOperands does currently make use of it. llvm-svn: 279901 --- diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1e31c74f..28d9322 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1453,6 +1453,36 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( return emitIndirectDst(MI, *BB, getSubtarget()->getInstrInfo()); case AMDGPU::SI_KILL: return splitKillBlock(MI, BB); + case AMDGPU::V_CNDMASK_B64_PSEUDO: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src0 = MI.getOperand(1).getReg(); + unsigned Src1 = MI.getOperand(2).getReg(); + const DebugLoc &DL = MI.getDebugLoc(); + unsigned SrcCond = MI.getOperand(3).getReg(); + + unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) + .addReg(Src0, 0, AMDGPU::sub0) + .addReg(Src1, 0, AMDGPU::sub0) + .addReg(SrcCond); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) + .addReg(Src0, 0, AMDGPU::sub1) + .addReg(Src1, 0, AMDGPU::sub1) + .addReg(SrcCond); + + BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst) + .addReg(DstLo) + .addImm(AMDGPU::sub0) + .addReg(DstHi) + .addImm(AMDGPU::sub1); + MI.eraseFromParent(); + return BB; + } default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 433c0f2..2e7ca45 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -880,29 +880,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } - - case AMDGPU::V_CNDMASK_B64_PSEUDO: { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); - unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); - unsigned Src0 = MI.getOperand(1).getReg(); - unsigned Src1 = MI.getOperand(2).getReg(); - const MachineOperand &SrcCond = MI.getOperand(3); - - BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) - .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) - .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) - .addReg(SrcCond.getReg()) - .addReg(Dst, RegState::Implicit | RegState::Define); - BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) - .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) - .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) - .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill())) - .addReg(Dst, RegState::Implicit | RegState::Define); - MI.eraseFromParent(); - break; - } - case AMDGPU::SI_PC_ADD_REL_OFFSET: { MachineFunction &MF = *MBB.getParent(); unsigned Reg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ad45df0..e02e2ae 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1781,6 +1781,7 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []> { let isPseudo = 1; let isCodeGenOnly = 1; + let usesCustomInserter = 1; } // 64-bit vector move instruction. This is mainly used by the SIFoldOperands diff --git a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll index ea708a2..904dfe3 100644 --- a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll @@ -14,9 +14,9 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone ; CI: v_floor_f64_e32 ; SI: v_fract_f64_e32 ; SI-DAG: v_min_f64 -; SI-DAG: v_cmp_class_f64_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI-DAG: v_cmp_class_f64_e64 vcc +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 ; SI: v_add_f64 ; SI: s_endpgm define void @ffloor_f64(double addrspace(1)* %out, double %x) { @@ -29,9 +29,9 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) { ; CI: v_floor_f64_e64 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]] ; SI-DAG: v_min_f64 -; SI-DAG: v_cmp_class_f64_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI-DAG: v_cmp_class_f64_e64 vcc +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]] ; SI: s_endpgm define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { @@ -45,9 +45,9 @@ define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { ; CI: v_floor_f64_e64 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]| ; SI-DAG: v_min_f64 -; SI-DAG: v_cmp_class_f64_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI-DAG: v_cmp_class_f64_e64 vcc +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]| ; SI: s_endpgm define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) { diff --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll index 68b8843..29b90a4 100644 --- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll @@ -13,9 +13,9 @@ declare double @llvm.floor.f64(double) #0 ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] -; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] +; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3 +; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc +; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc ; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}} ; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]] @@ -40,9 +40,9 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] -; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] +; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3 +; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc +; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc ; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}} ; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]] @@ -68,9 +68,9 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] -; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] +; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3 +; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc +; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc ; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}} ; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -[[SUB0]]