From: Stanislav Mekhanoshin Date: Tue, 22 Oct 2019 18:07:15 +0000 (-0700) Subject: [AMDGPU] Allow tied operand subreg folding X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=48f57138be55a939afc64d357f7b26220206127a;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Allow tied operand subreg folding Turns out it makes sense, contrarily to what comment said. Differential Revision: https://reviews.llvm.org/D69287 --- diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4eac031..69ac367 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -513,18 +513,6 @@ void SIFoldOperands::foldOperand( if (UseOp.isReg() && OpToFold.isReg()) { if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) return; - - // Don't fold subregister extracts into tied operands, only if it is a full - // copy since a subregister use tied to a full register def doesn't really - // make sense. e.g. don't fold: - // - // %1 = COPY %0:sub1 - // %2 = V_MAC_{F16, F32} %3, %4, %1 - // - // into - // %2 = V_MAC_{F16, F32} %3, %4, %0:sub1 - if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) - return; } // Special case for REG_SEQUENCE: We can't fold literals into diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir index 7d1c75c..db57d77 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir @@ -25,3 +25,19 @@ body: | DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec ... + +# GCN-LABEL: name: fma_sgpr_use +# GCN: %0:sreg_64_xexec = IMPLICIT_DEF +# GCN: %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMAC_F32_e64 2, %0.sub0, 0, 1073741824, 0, %3, 0, 0, implicit $exec + +--- +name: fma_sgpr_use +body: | + bb.0: + %0:sreg_64_xexec = IMPLICIT_DEF + %1:sgpr_32 = COPY %0.sub0 + %2:sgpr_32 = COPY %0.sub1 + %3:vgpr_32 = COPY %2 + %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMAC_F32_e64 2, %1, 0, 1073741824, 0, %3, 0, 0, implicit $exec + DS_WRITE2_B32_gfx9 undef %5:vgpr_32, killed %4, undef %6:vgpr_32, 0, 1, 0, implicit $exec +...