From a42070a6aa8f32165cdce3e1ff6304c3cd0b8ff6 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Thu, 12 Sep 2019 22:10:36 +0000 Subject: [PATCH] [AArch64][GlobalISel] Support sibling calls with outgoing arguments This adds support for lowering sibling calls with outgoing arguments. e.g ``` define void @foo(i32 %a) ``` Support is ported from AArch64ISelLowering's `isEligibleForTailCallOptimization`. The only thing that is missing is a full port of `TargetLowering::parametersInCSRMatch`. So, if we're using swiftself, we'll never tail call. - Rename `analyzeCallResult` to `analyzeArgInfo`, since the function is now used for both outgoing and incoming arguments - Teach `OutgoingArgHandler` about tail calls. Tail calls use frame indices for stack arguments. - Teach `lowerFormalArguments` to set the bytes in the caller's stack argument area. This is used later to check if the tail call's parameters will fit on the caller's stack. - Add `areCalleeOutgoingArgsTailCallable` to perform the eligibility check on the callee's outgoing arguments. For testing: - Update call-translator-tail-call to verify that we can now tail call with outgoing arguments, use G_FRAME_INDEX for stack arguments, and respect the size of the caller's stack - Remove GISel-specific check lines from speculation-hardening.ll, since GISel now tail calls like the other selectors - Add a GISel test line to tailcall-string-rvo.ll since we can tail call in that test now - Add a GISel test line to tailcall_misched_graph.ll since we tail call there now. Add specific check lines for GISel, since the debug output from the machine-scheduler differs with GlobalISel. The dependency still holds, but the output comes out in a different order. Differential Revision: https://reviews.llvm.org/D67471 llvm-svn: 371780 --- .../include/llvm/CodeGen/GlobalISel/CallLowering.h | 10 +- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 6 +- llvm/lib/Target/AArch64/AArch64CallLowering.cpp | 117 +++++++++++++++++---- llvm/lib/Target/AArch64/AArch64CallLowering.h | 7 +- .../GlobalISel/call-translator-tail-call.ll | 35 ++++-- llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll | 1 + llvm/test/CodeGen/AArch64/speculation-hardening.ll | 37 +++---- llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll | 1 + .../test/CodeGen/AArch64/tailcall_misched_graph.ll | 46 ++++---- 9 files changed, 183 insertions(+), 77 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index e2135fc..6e54e70 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -211,10 +211,12 @@ protected: SmallVectorImpl &Args, ValueHandler &Handler) const; - /// Analyze the return values of a call, incorporating info about the passed - /// values into \p CCState. - bool analyzeCallResult(CCState &CCState, SmallVectorImpl &Args, - CCAssignFn &Fn) const; + /// Analyze passed or returned values from a call, supplied in \p ArgInfo, + /// incorporating info about the passed values into \p CCState. + /// + /// Used to check if arguments are suitable for tail call lowering. + bool analyzeArgInfo(CCState &CCState, SmallVectorImpl &Args, + CCAssignFn &Fn) const; /// \returns True if the calling convention for a callee and its caller pass /// results in the same way. Typically used for tail call eligibility checks. diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index ccbe56d..9280c9e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -378,7 +378,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, return true; } -bool CallLowering::analyzeCallResult(CCState &CCState, +bool CallLowering::analyzeArgInfo(CCState &CCState, SmallVectorImpl &Args, CCAssignFn &Fn) const { for (unsigned i = 0, e = Args.size(); i < e; ++i) { @@ -407,12 +407,12 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, SmallVector ArgLocs1; CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); - if (!analyzeCallResult(CCInfo1, InArgs, CalleeAssignFn)) + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn)) return false; SmallVector ArgLocs2; CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); - if (!analyzeCallResult(CCInfo2, InArgs, CallerAssignFn)) + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn)) return false; // We need the argument locations to match up exactly. If there's more in diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp index c5d4b18..bad8fb3 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -130,14 +130,26 @@ struct CallReturnHandler : public IncomingArgHandler { struct OutgoingArgHandler : public CallLowering::ValueHandler { OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder MIB, CCAssignFn *AssignFn, - CCAssignFn *AssignFnVarArg) + CCAssignFn *AssignFnVarArg, bool IsTailCall = false) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), - AssignFnVarArg(AssignFnVarArg), StackSize(0) {} + AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {} Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { + MachineFunction &MF = MIRBuilder.getMF(); LLT p0 = LLT::pointer(0, 64); LLT s64 = LLT::scalar(64); + + if (IsTailCall) { + // TODO: For -tailcallopt tail calls, Offset will need FPDiff like in + // ISelLowering. + int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); + Register FIReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildFrameIndex(FIReg, FI); + MPO = MachinePointerInfo::getFixedStack(MF, FI); + return FIReg; + } + Register SPReg = MRI.createGenericVirtualRegister(p0); MIRBuilder.buildCopy(SPReg, Register(AArch64::SP)); @@ -147,7 +159,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { Register AddrReg = MRI.createGenericVirtualRegister(p0); MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg); - MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset); + MPO = MachinePointerInfo::getStack(MF, Offset); return AddrReg; } @@ -188,6 +200,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { MachineInstrBuilder MIB; CCAssignFn *AssignFnVarArg; + bool IsTailCall; uint64_t StackSize; }; } // namespace @@ -378,6 +391,8 @@ bool AArch64CallLowering::lowerFormalArguments( if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + uint64_t StackOffset = Handler.StackUsed; if (F.isVarArg()) { auto &Subtarget = MF.getSubtarget(); if (!Subtarget.isTargetDarwin()) { @@ -387,14 +402,20 @@ bool AArch64CallLowering::lowerFormalArguments( } // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. - uint64_t StackOffset = - alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); + StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); - AArch64FunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); } + // TODO: Port checks for stack to restore for -tailcallopt from ISelLowering. + // We need to keep track of the size of function stacks for tail call + // optimization. When we tail call, we need to check if the callee's arguments + // will fit on the caller's stack. So, whenever we lower formal arguments, + // we should keep track of this information, since we might lower a tail call + // in this function later. + FuncInfo->setBytesInStackArgArea(StackOffset); + auto &Subtarget = MF.getSubtarget(); if (Subtarget.hasCustomCallingConv()) Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); @@ -454,9 +475,67 @@ bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); } +bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const { + // If there are no outgoing arguments, then we are done. + if (OutArgs.empty()) + return true; + + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + const AArch64TargetLowering &TLI = *getTLI(); + + // We have outgoing arguments. Make sure that we can tail call with them. + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); + + if (!analyzeArgInfo(OutInfo, OutArgs, + *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) { + LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); + return false; + } + + // Make sure that they can fit on the caller's stack. + const AArch64FunctionInfo *FuncInfo = MF.getInfo(); + if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { + LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); + return false; + } + + // Verify that the parameters in callee-saved registers match. + // TODO: Port this over to CallLowering as general code once swiftself is + // supported. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); + + for (auto &ArgLoc : OutLocs) { + // If it's not a register, it's fine. + if (!ArgLoc.isRegLoc()) + continue; + + Register Reg = ArgLoc.getLocReg(); + + // Only look at callee-saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + + // TODO: Port the remainder of this check from TargetLowering to support + // tail calling swiftself. + LLVM_DEBUG( + dbgs() + << "... Cannot handle callee-saved registers in outgoing args yet.\n"); + return false; + } + + return true; +} + bool AArch64CallLowering::isEligibleForTailCallOptimization( MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, - SmallVectorImpl &InArgs) const { + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const { CallingConv::ID CalleeCC = Info.CallConv; MachineFunction &MF = MIRBuilder.getMF(); const Function &CallerF = MF.getFunction(); @@ -535,7 +614,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization( assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"); - // Look at the incoming values. + // Verify that the incoming and outgoing arguments from the callee are + // safe to tail call. if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { LLVM_DEBUG( dbgs() @@ -543,13 +623,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization( return false; } - // For now, only handle callees that take no arguments. - if (!Info.OrigArgs.empty()) { - LLVM_DEBUG( - dbgs() - << "... Cannot tail call callees with outgoing arguments yet.\n"); + if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) return false; - } LLVM_DEBUG( dbgs() << "... Call is eligible for tail call optimization.\n"); @@ -592,20 +667,20 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; } - SmallVector SplitArgs; + SmallVector OutArgs; for (auto &OrigArg : Info.OrigArgs) { - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, Info.CallConv); + splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv); // AAPCS requires that we zero-extend i1 to 8 bits by the caller. if (OrigArg.Ty->isIntegerTy(1)) - SplitArgs.back().Flags[0].setZExt(); + OutArgs.back().Flags[0].setZExt(); } SmallVector InArgs; if (!Info.OrigRet.Ty->isVoidTy()) splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv()); - bool IsSibCall = Info.IsTailCall && - isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs); + bool IsSibCall = Info.IsTailCall && isEligibleForTailCallOptimization( + MIRBuilder, Info, InArgs, OutArgs); if (IsSibCall) MF.getFrameInfo().setHasTailCall(); @@ -655,8 +730,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Do the actual argument marshalling. SmallVector PhysRegs; OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + AssignFnVarArg, IsSibCall); + if (!handleAssignments(MIRBuilder, OutArgs, Handler)) return false; // Now we can add the actual call instruction to the correct basic block. diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.h b/llvm/lib/Target/AArch64/AArch64CallLowering.h index 696d4d8..4777a16 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.h @@ -47,7 +47,8 @@ public: bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, - SmallVectorImpl &InArgs) const; + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const; bool supportSwiftError() const override { return true; } @@ -67,6 +68,10 @@ private: doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl &InArgs) const; + + bool + areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll index 54a008b..8a00889 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -24,22 +24,45 @@ define void @indirect_tail_call(void()* %func) { } declare void @outgoing_args_fn(i32) -; Right now, callees with outgoing arguments should not be tail called. -; TODO: Support this. define void @test_outgoing_args(i32 %a) { ; COMMON-LABEL: name: test_outgoing_args ; COMMON: bb.1 (%ir-block.0): ; COMMON: liveins: $w0 ; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; COMMON: $w0 = COPY [[COPY]](s32) - ; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 - ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; COMMON: RET_ReallyLR + ; COMMON: TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0 tail call void @outgoing_args_fn(i32 %a) ret void } +; Verify that we create frame indices for memory arguments in tail calls. +; We get a bunch of copies here which are unused and thus eliminated. So, let's +; just focus on what matters, which is that we get a G_FRAME_INDEX. +declare void @outgoing_stack_args_fn(<4 x half>) +define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) { + ; COMMON-LABEL: name: test_outgoing_stack_args + ; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; COMMON: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1) + ; COMMON: $d0 = COPY [[LOAD]](<4 x s16>) + ; COMMON: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0 + tail call void @outgoing_stack_args_fn(<4 x half> %arg) + ret void +} + +; Verify that we don't tail call when we cannot fit arguments on the caller's +; stack. +declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s) +define i32 @test_too_big_stack() { + ; COMMON-LABEL: name: test_too_big_stack + ; COMMON-NOT: TCRETURNdi + ; COMMON-NOT: TCRETURNri + ; COMMON: BL @too_big_stack + ; COMMON-DAG: RET_ReallyLR +entry: + %call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9) + ret i32 %call +} + ; Right now, we don't want to tail call callees with nonvoid return types, since ; call lowering will insert COPYs after the call. ; TODO: Support this. diff --git a/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll b/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll index 7a91f05..9b6d1f3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll +++ b/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s +; RUN: llc -global-isel < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @t = weak global i32 ()* null @x = external global i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll index a6c3ed6..81aea75 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll @@ -1,9 +1,9 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure -; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { ; CHECK-LABEL: f @@ -54,24 +54,13 @@ return: ; preds = %entry, %if.then ; Make sure that for a tail call, taint doesn't get put into SP twice. define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR { ; CHECK-LABEL: tail_caller: -; NOGISELSLH: mov [[TMPREG:x[0-9]+]], sp -; NOGISELSLH: and [[TMPREG]], [[TMPREG]], x16 -; NOGISELSLH: mov sp, [[TMPREG]] -; NOGISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp -; NOGISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 -; NOGISELNOSLH-NOT: mov sp, [[TMPREG]] -; GISELSLH: mov [[TMPREG:x[0-9]+]], sp -; GISELSLH: and [[TMPREG]], [[TMPREG]], x16 -; GISELSLH: mov sp, [[TMPREG]] -; GISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp -; GISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 -; GISELNOSLH-NOT: mov sp, [[TMPREG]] -; GlobalISel doesn't optimize tail calls (yet?), so only check that -; cross-call taint register setup code is missing if a tail call was -; actually produced. -; NOGISELSLH: b tail_callee -; GISELSLH: bl tail_callee -; GISELSLH: cmp sp, #0 +; SLH: mov [[TMPREG:x[0-9]+]], sp +; SLH: and [[TMPREG]], [[TMPREG]], x16 +; SLH: mov sp, [[TMPREG]] +; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOSLH-NOT: mov sp, [[TMPREG]] +; SLH: b tail_callee ; SLH-NOT: cmp sp, #0 %call = tail call i32 @tail_callee(i32 %a) ret i32 %call diff --git a/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll b/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll index d9d2180..ac9ce4d1 100644 --- a/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll +++ b/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll @@ -1,4 +1,5 @@ ; RUN: llc -relocation-model=static -verify-machineinstrs -O2 < %s | FileCheck %s +; RUN: llc -relocation-model=static -verify-machineinstrs -global-isel -O2 < %s | FileCheck %s ; The call to function TestBar should be a tail call, when in C++ the string ; `ret` is RVO returned. diff --git a/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll b/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll index 8b7f979..090ab37 100644 --- a/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll +++ b/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll @@ -1,4 +1,5 @@ -; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s +; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s --check-prefixes=COMMON,SDAG +; RUN: llc -mcpu=cyclone -global-isel -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GISEL ; REQUIRES: asserts @@ -20,25 +21,34 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*, ; PR23459 has a test case that we where miscompiling because of this at the ; time. -; CHECK: Frame Objects -; CHECK: fi#-4: {{.*}} fixed, at location [SP+8] -; CHECK: fi#-3: {{.*}} fixed, at location [SP] -; CHECK: fi#-2: {{.*}} fixed, at location [SP+8] -; CHECK: fi#-1: {{.*}} fixed, at location [SP] - -; CHECK: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3 -; CHECK: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2 -; CHECK: STRXui %{{.*}}, %fixed-stack.0 -; CHECK: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1 +; COMMON: Frame Objects +; COMMON: fi#-4: {{.*}} fixed, at location [SP+8] +; COMMON: fi#-3: {{.*}} fixed, at location [SP] +; COMMON: fi#-2: {{.*}} fixed, at location [SP+8] +; COMMON: fi#-1: {{.*}} fixed, at location [SP] + +; The order that these appear in differes in GISel than SDAG, but the +; dependency relationship still holds. +; COMMON: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3 +; COMMON: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2 +; SDAG: STRXui %{{.*}}, %fixed-stack.0 +; SDAG: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1 +; GISEL: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1 +; GISEL: STRXui %{{.*}}, %fixed-stack.0 ; Make sure that there is an dependence edge between fi#-2 and fi#-4. ; Without this edge the scheduler would be free to move the store accross the load. -; CHECK: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2 -; CHECK-NOT: SU -; CHECK: Successors: -; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0 -; CHECK: SU([[DEPSTOREA:.*]]): Ord Latency=0 +; COMMON: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2 +; COMMON-NOT: SU +; COMMON: Successors: +; COMMON: SU([[DEPSTOREB:.*]]): Ord Latency=0 +; COMMON: SU([[DEPSTOREA:.*]]): Ord Latency=0 + +; GlobalISel outputs DEPSTOREB before DEPSTOREA, but the dependency relationship +; still holds. +; SDAG: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0 +; SDAG: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1 -; CHECK: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0 -; CHECK: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1 +; GISEL: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.0 +; GISEL: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.1 -- 2.7.4