From ce9633633c07045068d202cc225dbf2deb7e4e8b Mon Sep 17 00:00:00 2001 From: Shu-Chun Weng Date: Tue, 4 Feb 2020 15:52:57 -0800 Subject: [PATCH] [GlobalISel][AArch64] Fix contract cross-bank copies with SIMD instructions contractCrossBankCopyIntoStore() finds the instruction defines the source register and uses its output to replace the register. There are, however, instructions that have multiple outputs, e.g. G_UNMERGE_VALUES. Current implementation hardcodes to operand 0 and has no way of knowing which output should be used. This change adds another function to directly return the register that is the source of the register and use that for folding. This fixes https://bugs.llvm.org/show_bug.cgi?id=44783 Differential Revision: https://reviews.llvm.org/D74005 --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 7 ++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 35 +++++++++++++++---- .../Target/AArch64/AArch64InstructionSelector.cpp | 5 ++- .../CodeGen/AArch64/GlobalISel/contract-store.mir | 40 ++++++++++++++++++++++ 4 files changed, 78 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 351a855..63c5746 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -144,6 +144,13 @@ MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg, MachineInstr *getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); +/// Find the source register for \p Reg, folding away any trivial copies. It +/// will be an output register of the instruction that getDefIgnoringCopies +/// returns. May return an invalid register if \p Reg is not a generic virtual +/// register. +Register getSrcRegIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI); + /// Returns an APFloat from Val converted to the appropriate size. APFloat getAPFloatFromSize(double Val, unsigned Size); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index b4df898..5f72974 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -292,28 +292,51 @@ Optional llvm::getConstantVRegValWithLookThrough( return ValueAndVReg{Val.getSExtValue(), VReg}; } -const llvm::ConstantFP* llvm::getConstantFPVRegVal(Register VReg, - const MachineRegisterInfo &MRI) { +const llvm::ConstantFP * +llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { MachineInstr *MI = MRI.getVRegDef(VReg); if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) return nullptr; return MI->getOperand(1).getFPImm(); } -llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, - const MachineRegisterInfo &MRI) { +namespace { +struct DefinitionAndSourceRegister { + llvm::MachineInstr *MI; + Register Reg; +}; +} // namespace + +static llvm::Optional +getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { + Register DefSrcReg = Reg; auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) - return nullptr; + return None; while (DefMI->getOpcode() == TargetOpcode::COPY) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) break; DefMI = MRI.getVRegDef(SrcReg); + DefSrcReg = SrcReg; } - return DefMI; + return DefinitionAndSourceRegister{DefMI, DefSrcReg}; +} + +llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { + Optional DefSrcReg = + getDefSrcRegIgnoringCopies(Reg, MRI); + return DefSrcReg ? DefSrcReg->MI : nullptr; +} + +Register llvm::getSrcRegIgnoringCopies(Register Reg, + const MachineRegisterInfo &MRI) { + Optional DefSrcReg = + getDefSrcRegIgnoringCopies(Reg, MRI); + return DefSrcReg ? DefSrcReg->Reg : Register(); } llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg, diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index d91a672..f933db5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1590,10 +1590,9 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( // G_STORE %x:gpr(s32) // // And then continue the selection process normally. - MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); - if (!Def) + Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); + if (!DefDstReg.isValid()) return false; - Register DefDstReg = Def->getOperand(0).getReg(); LLT DefDstTy = MRI.getType(DefDstReg); Register StoreSrcReg = I.getOperand(0).getReg(); LLT StoreSrcTy = MRI.getType(StoreSrcReg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir index f1b40c1..8927588 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir @@ -7,6 +7,8 @@ define void @contract_s64_fpr(i64* %addr) { ret void } define void @contract_s32_fpr(i32* %addr) { ret void } define void @contract_s16_fpr(i16* %addr) { ret void } + define void @contract_g_unmerge_values_first(i128* %addr) { ret void } + define void @contract_g_unmerge_values_second(i128* %addr) { ret void } ... --- name: contract_s64_gpr @@ -87,3 +89,41 @@ body: | %1:fpr(s16) = COPY $h1 %2:gpr(s16) = COPY %1 G_STORE %2:gpr(s16), %0 :: (store 2 into %ir.addr) +... +--- +name: contract_g_unmerge_values_first +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: contract_g_unmerge_values_first + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load 16 from %ir.addr) + %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) + %4:gpr(s64) = COPY %2 + %5:gpr(s64) = COPY %3 + G_STORE %4:gpr(s64), %0 :: (store 8 into %ir.addr) +... +--- +name: contract_g_unmerge_values_second +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: contract_g_unmerge_values_second + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1 + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load 16 from %ir.addr) + %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) + %4:gpr(s64) = COPY %2 + %5:gpr(s64) = COPY %3 + G_STORE %5:gpr(s64), %0 :: (store 8 into %ir.addr) -- 2.7.4