From: Stanislav Mekhanoshin Date: Thu, 10 Mar 2022 21:46:55 +0000 (-0800) Subject: [AMDGPU] Support v_mov_b64 in dpp combine X-Git-Tag: upstream/15.0.7~13888 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=31f215ab0c9f99eba282d264521b48c5483ff712;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Support v_mov_b64 in dpp combine Differential Revision: https://reviews.llvm.org/D121411 --- diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index a8c85ec..877b98d 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -167,7 +167,9 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const { return nullptr; case AMDGPU::COPY: case AMDGPU::V_MOV_B32_e32: - case AMDGPU::V_MOV_B64_PSEUDO: { + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: { auto &Op1 = Def->getOperand(1); if (Op1.isImm()) return &Op1; @@ -183,6 +185,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, bool CombBCZ, bool IsShrinkable) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || + MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); auto OrigOp = OrigMI.getOpcode(); @@ -383,6 +386,7 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName, bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || + MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI); @@ -399,7 +403,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { return false; } - if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { + if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || + MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl); assert(DppCtrl && DppCtrl->isImm()); if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) { @@ -616,7 +621,8 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; - } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { + } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || + MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { if (ST->has64BitDPP() && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 542095c..6fcd2d9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2157,6 +2157,13 @@ std::pair SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + if (ST.hasMovB64() && + AMDGPU::isLegal64BitDPPControl( + getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) { + MI.setDesc(get(AMDGPU::V_MOV_B64_dpp)); + return std::make_pair(&MI, nullptr); + } + MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MBB.findDebugLoc(MI); MachineFunction *MF = MBB.getParent(); diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll index df010bb..e55c804 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10 ; GCN-LABEL: {{^}}dpp64_ceil: @@ -50,6 +51,7 @@ define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(i64 addrspace(1)* %arg, i64 ; GCN-LABEL: {{^}}dpp64_div: ; GCN: global_load_dwordx2 [[V:v\[[0-9:]+\]]], +; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GFX10-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GCN: v_div_scale_f64 diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir index 8199f21..dfaa7b4 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN +# RUN: llc -march=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN --- # GCN-LABEL: name: dpp64_old_impdef