From 70603dcef22e6cd27d5dd7e544a0c479ba4f0dc5 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 24 Mar 2017 18:55:20 +0000 Subject: [PATCH] [AMDGPU] Fold V_CNDMASK with identical source operands Such instructions sometimes appear after lowering and folding. Differential Revision: https://reviews.llvm.org/D31318 llvm-svn: 298723 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 29 ++++++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/fold-cndmask.mir | 34 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-cndmask.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 0a0584d..04922f6 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -591,6 +591,32 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, return false; } +// Try to fold an instruction into a simpler one +static bool tryFoldInst(const SIInstrInfo *TII, + MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + + if (Opc == AMDGPU::V_CNDMASK_B32_e32 || + Opc == AMDGPU::V_CNDMASK_B32_e64 || + Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) { + const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); + const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1); + if (Src1->isIdenticalTo(*Src0)) { + DEBUG(dbgs() << "Folded " << *MI << " into "); + int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); + if (Src2Idx != -1) + MI->RemoveOperand(Src2Idx); + MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); + mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY + : getMovOpc(false))); + DEBUG(dbgs() << *MI << '\n'); + return true; + } + } + + return false; +} + void SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const { // We need mutate the operands of new mov instructions to add implicit @@ -692,6 +718,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); + tryFoldInst(TII, Fold.UseMI); } } } @@ -907,6 +934,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; + tryFoldInst(TII, &MI); + if (!isFoldableCopy(MI)) { if (IsIEEEMode || !tryFoldOMod(MI)) tryFoldClamp(MI); diff --git a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir new file mode 100644 index 0000000..8dfec91 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir @@ -0,0 +1,34 @@ +# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s + +# CHECK: %1 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %2 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %4 = COPY %3 +# CHECK: %5 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %6 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %7 = COPY %3 + +--- +name: fold_cndmask +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: vgpr_32 } +body: | + bb.0.entry: + %0 = IMPLICIT_DEF + %1 = V_CNDMASK_B32_e64 0, 0, %0, implicit %exec + %2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit %exec + %3 = IMPLICIT_DEF + %4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit %exec + %5 = COPY %1 + %6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit %exec + %vcc = IMPLICIT_DEF + %7 = V_CNDMASK_B32_e32 %3, %3, implicit %exec, implicit %vcc + +... -- 2.7.4