From a9cc3856647a26c742d06948320781f93c3e04e3 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 26 Oct 2016 20:01:00 +0000 Subject: [PATCH] ARM: don't rely on push/pop reglists being in order when folding SP adjust. It would be a very nice invariant to rely on, but unfortunately it doesn't necessarily hold (and the causes of mis-sorted reglists appear to be quite varied) so to be robust the frame lowering code can't assume that the first register in the list is also the first one that actually gets pushed. Should fix an issue where we were turning something like: push {r8, r4, r7, lr} sub sp, #24 into nonsense like: push {r2, r3, r4, r5, r6, r7, r8, r4, r7, lr} llvm-svn: 285232 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 27 +++++++++++++++++++-------- llvm/test/CodeGen/ARM/fold-stack-adjust.ll | 14 ++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6aa060a..9c8229c 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2077,29 +2077,40 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, int RegListIdx = IsT1PushPop ? 2 : 4; // Calculate the space we'll need in terms of registers. - unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); - unsigned RD0Reg, RegsNeeded; + unsigned RegsNeeded; + const TargetRegisterClass *RegClass; if (IsVFPPushPop) { - RD0Reg = ARM::D0; RegsNeeded = NumBytes / 8; + RegClass = &ARM::DPRRegClass; } else { - RD0Reg = ARM::R0; RegsNeeded = NumBytes / 4; + RegClass = &ARM::GPRRegClass; } // We're going to have to strip all list operands off before // re-adding them since the order matters, so save the existing ones // for later. SmallVector RegList; - for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) - RegList.push_back(MI->getOperand(i)); + + // We're also going to need the first register transferred by this + // instruction, which won't necessarily be the first register in the list. + unsigned FirstRegEnc = -1; const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { + MachineOperand &MO = MI->getOperand(i); + RegList.push_back(MO); + + if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) + FirstRegEnc = TRI->getEncodingValue(MO.getReg()); + } + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); // Now try to find enough space in the reglist to allocate NumBytes. - for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; - --CurReg) { + for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; + --CurRegEnc) { + unsigned CurReg = RegClass->getRegister(CurRegEnc); if (!IsPop) { // Pushing any register is completely harmless, mark the // register involved as undef since we don't care about it in diff --git a/llvm/test/CodeGen/ARM/fold-stack-adjust.ll b/llvm/test/CodeGen/ARM/fold-stack-adjust.ll index 8944a40..442459b 100644 --- a/llvm/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/llvm/test/CodeGen/ARM/fold-stack-adjust.ll @@ -218,4 +218,18 @@ exit: ; preds = %if.then, %entry ret float %call1 } +declare void @use_arr(i32*) +define void @test_fold_reuse() minsize { +; CHECK-LABEL: test_fold_reuse: +; CHECK: push.w {r4, r7, r8, lr} +; CHECK: sub sp, #24 +; [...] +; CHECK: add sp, #24 +; CHECK: pop.w {r4, r7, r8, pc} + %arr = alloca i8, i32 24 + call void asm sideeffect "", "~{r8},~{r4}"() + call void @bar(i8* %arr) + ret void +} + declare void @llvm.va_start(i8*) nounwind -- 2.7.4