From 02709bcb78a4c209d2bbe6541ae32308bc7c2833 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 16 May 2018 19:49:01 +0000 Subject: [PATCH] [MachineOutliner] Don't save/restore LR for tail calls. The cost computation assumes we do this correctly, but the actual lowering was wrong. Differential Revision: https://reviews.llvm.org/D46923 llvm-svn: 332514 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 ++++--- llvm/test/CodeGen/AArch64/machine-outliner-tail.ll | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-tail.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 879d91d..d97d355 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5255,10 +5255,11 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { void AArch64InstrInfo::insertOutlinerEpilogue( MachineBasicBlock &MBB, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const { - // Is there a call in the outlined range? - if (std::any_of(MBB.instr_begin(), MBB.instr_end(), - [](MachineInstr &MI) { return MI.isCall(); })) { + auto IsNonTailCall = [](MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); + }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { // Fix up the instructions in the range, since we're going to modify the // stack. fixupPostOutline(MBB); diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-tail.ll b/llvm/test/CodeGen/AArch64/machine-outliner-tail.ll new file mode 100644 index 0000000..751128c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-tail.ll @@ -0,0 +1,22 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +; CHECK: OUTLINED_FUNCTION_0: +; CHECK: orr w0, wzr, #0x1 +; CHECK-NEXT: orr w1, wzr, #0x2 +; CHECK-NEXT: orr w2, wzr, #0x3 +; CHECK-NEXT: orr w3, wzr, #0x4 +; CHECK-NEXT: b z + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} -- 2.7.4