From: Francis Visoiu Mistrih Date: Fri, 27 Apr 2018 15:30:54 +0000 (+0000) Subject: [AArch64] Place the first ldp at the end when ReverseCSRRestoreSeq is true X-Git-Tag: llvmorg-7.0.0-rc1~7226 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c855e92ca9cf817d0e816f29ca1bb2edd9380f2e;p=platform%2Fupstream%2Fllvm.git [AArch64] Place the first ldp at the end when ReverseCSRRestoreSeq is true Put the first ldp at the end, so that the load-store optimizer can run and merge the ldp and the add into a post-index ldp. This didn't work in case no frame was needed and resulted in code size regressions. llvm-svn: 331044 --- diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a573e2c..d233827 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -514,6 +514,38 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); } +static void adaptForLdStOpt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator FirstSPPopI, + MachineBasicBlock::iterator LastPopI) { + // Sometimes (when we restore in the same order as we save), we can end up + // with code like this: + // + // ldp x26, x25, [sp] + // ldp x24, x23, [sp, #16] + // ldp x22, x21, [sp, #32] + // ldp x20, x19, [sp, #48] + // add sp, sp, #64 + // + // In this case, it is always better to put the first ldp at the end, so + // that the load-store optimizer can run and merge the ldp and the add into + // a post-index ldp. + // If we managed to grab the first pop instruction, move it to the end. + if (ReverseCSRRestoreSeq) + MBB.splice(FirstSPPopI, &MBB, LastPopI); + // We should end up with something like this now: + // + // ldp x24, x23, [sp, #16] + // ldp x22, x21, [sp, #32] + // ldp x20, x19, [sp, #48] + // ldp x26, x25, [sp] + // add sp, sp, #64 + // + // and the load-store optimizer can merge the last two instructions into: + // + // ldp x26, x25, [sp], #64 + // +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -930,12 +962,20 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += AfterCSRPopSize; - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy); + // If we were able to combine the local stack pop with the argument pop, // then we're done. - if (NoCalleeSaveRestore || AfterCSRPopSize == 0) + bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0; + + // If we're done after this, make sure to help the load store optimizer. + if (Done) + adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); + + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + StackRestoreBytes, TII, MachineInstr::FrameDestroy); + if (Done) return; + NumBytes = 0; } @@ -967,33 +1007,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, FirstSPPopI = Prev; } - // Sometimes (when we restore in the same order as we save), we can end up - // with code like this: - // - // ldp x26, x25, [sp] - // ldp x24, x23, [sp, #16] - // ldp x22, x21, [sp, #32] - // ldp x20, x19, [sp, #48] - // add sp, sp, #64 - // - // In this case, it is always better to put the first ldp at the end, so - // that the load-store optimizer can run and merge the ldp and the add into - // a post-index ldp. - // If we managed to grab the first pop instruction, move it to the end. - if (LastPopI != Begin) - MBB.splice(FirstSPPopI, &MBB, LastPopI); - // We should end up with something like this now: - // - // ldp x24, x23, [sp, #16] - // ldp x22, x21, [sp, #32] - // ldp x20, x19, [sp, #48] - // ldp x26, x25, [sp] - // add sp, sp, #64 - // - // and the load-store optimizer can merge the last two instructions into: - // - // ldp x26, x25, [sp], #64 - // + adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); + emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, AfterCSRPopSize, TII, MachineInstr::FrameDestroy); } diff --git a/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir index 32234fd..2b168b3 100644 --- a/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir +++ b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir @@ -7,6 +7,8 @@ define void @bar() nounwind { entry: unreachable } + define void @baz() nounwind { entry: unreachable } + ... --- name: foo @@ -71,3 +73,34 @@ body: | ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0 RET_ReallyLR ... +--- +# Check that the load from the offset 0 is moved at the end even when hasFP is +# false. +name: baz +# CHECK-LABEL: name: baz +alignment: 2 +tracksRegLiveness: true +frameInfo: + adjustsStack: true + hasCalls: true +body: | + bb.0: + successors: %bb.1 + + $x0 = IMPLICIT_DEF + $x20 = IMPLICIT_DEF + $x21 = IMPLICIT_DEF + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.1 + + bb.1: + ; CHECK: $x20, $lr = frame-destroy LDPXi $sp, 2 + ; BEFORELDSTOPT-NEXT: $x21 = frame-destroy LDRXui $sp, 0 + ; BEFORELDSTOPT-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0 + + ; AFTERLDSTOPT-NEXT: early-clobber $sp, $x21 = frame-destroy LDRXpost $sp, 32 + RET_ReallyLR +...