From 747e5cfb9f5d944b47fe014925b0d5dc2fda74d7 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 12 Jan 2021 13:12:40 +0000 Subject: [PATCH] X86: support Swift Async context This adds support to the X86 backend for the newly committed swiftasync function parameter. If such a (pointer) parameter is present it gets stored into an augmented frame record (populated in IR, but generally containing enhanced backtrace for coroutines using lots of tail calls back and forth). The context frame is identical to AArch64 (primarily so that unwinders etc don't get extra complexity). Specfically, the new frame record is [AsyncCtx, %rbp, ReturnAddr], and its presence is signalled by bit 60 of the stored %rbp being set to 1. %rbp still points to the frame pointer in memory for backwards compatibility (only partial on x86, but OTOH the weird AsyncCtx before the rest of the record is because of x86). --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 8 ++ llvm/lib/Target/X86/X86FastISel.cpp | 1 + llvm/lib/Target/X86/X86FrameLowering.cpp | 100 +++++++++++++++++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 36 ++++++- llvm/lib/Target/X86/X86MachineFunctionInfo.h | 15 +++ llvm/test/CodeGen/X86/swift-async-reg.ll | 17 ++++ llvm/test/CodeGen/X86/swift-async.ll | 111 +++++++++++++++++++++ 7 files changed, 279 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/X86/swift-async-reg.ll create mode 100644 llvm/test/CodeGen/X86/swift-async.ll diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 628b8f5..7ba90f6 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -1454,6 +1454,7 @@ public: unsigned StackAdjust = 0; unsigned StackSize = 0; unsigned NumDefCFAOffsets = 0; + int MinAbsOffset = std::numeric_limits::max(); for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { const MCCFIInstruction &Inst = Instrs[i]; @@ -1482,6 +1483,7 @@ public: memset(SavedRegs, 0, sizeof(SavedRegs)); StackAdjust = 0; SavedRegIdx = 0; + MinAbsOffset = std::numeric_limits::max(); InstrOffset += MoveInstrSize; break; } @@ -1525,6 +1527,7 @@ public: unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); SavedRegs[SavedRegIdx++] = Reg; StackAdjust += OffsetSize; + MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); InstrOffset += PushInstrSize(Reg); break; } @@ -1538,6 +1541,11 @@ public: // Offset was too big for a compact unwind encoding. return CU::UNWIND_MODE_DWARF; + // We don't attempt to track a real StackAdjust, so if the saved registers + // aren't adjacent to rbp we can't cope. + if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) + return CU::UNWIND_MODE_DWARF; + // Get the encoding of the saved registers when we have a frame pointer. uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index cf8d5d6..8a6b9e7 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3068,6 +3068,7 @@ bool X86FastISel::fastLowerArguments() { Arg.hasAttribute(Attribute::InReg) || Arg.hasAttribute(Attribute::StructRet) || Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftAsync) || Arg.hasAttribute(Attribute::SwiftError) || Arg.hasAttribute(Attribute::Nest)) return false; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 3c09366..8ddcd52 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -409,7 +409,12 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, return 0; PI = MBB.erase(PI); - if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI); + if (PI != MBB.end() && PI->isCFIInstruction()) { + auto CIs = MBB.getParent()->getFrameInstructions(); + MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()]; + if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset) + PI = MBB.erase(PI); + } if (!doMergeWithPrevious) MBBI = skipDebugInstructionsForward(PI, MBB.end()); @@ -1356,6 +1361,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, STI.getTargetLowering()->hasStackProbeSymbol(MF); unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); + if (HasFP && X86FI->hasSwiftAsyncContext()) { + BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), + MachineFramePtr) + .addUse(MachineFramePtr) + .addImm(60) + .setMIFlag(MachineInstr::FrameSetup); + } + // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte // stack alignment. @@ -1470,11 +1483,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (!IsWin64Prologue && !IsFunclet) { // Update EBP with the new base value. - BuildMI(MBB, MBBI, DL, - TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), - FramePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); + if (!X86FI->hasSwiftAsyncContext()) { + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), + FramePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // Before we update the live frame pointer we have to ensure there's a + // valid (or null) asynchronous context in its slot just before FP in + // the frame record, so store it now. + const auto &Attrs = MF.getFunction().getAttributes(); + + if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { + // We have an initial context in r11, store it just before the frame + // pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) + .addReg(X86::R14) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // No initial context, store null so that there's no pointer that + // could be misused. + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + } + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) + .addUse(X86::RSP) + .addImm(8) + .setMIFlag(MachineInstr::FrameSetup); + } if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. @@ -1979,10 +2024,26 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // AfterPop is the position to insert .cfi_restore. MachineBasicBlock::iterator AfterPop = MBBI; if (HasFP) { + if (X86FI->hasSwiftAsyncContext()) { + // Discard the context. + int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); + emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true); + } // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) .setMIFlag(MachineInstr::FrameDestroy); + + // We need to reset FP to its untagged state on return. Bit 60 is currently + // used to show the presence of an extended frame. + if (X86FI->hasSwiftAsyncContext()) { + BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), + MachineFramePtr) + .addUse(MachineFramePtr) + .addImm(60) + .setMIFlag(MachineInstr::FrameDestroy); + } + if (NeedsDwarfCFI) { unsigned DwarfStackPtr = TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); @@ -2007,7 +2068,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy))) + (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) break; FirstCSPop = PI; } @@ -2039,6 +2102,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, uint64_t LEAAmount = IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; + if (X86FI->hasSwiftAsyncContext()) + LEAAmount -= 16; + // There are only two legal forms of epilogue: // - add SEHAllocationSize, %rsp // - lea SEHAllocationSize(%FramePtr), %rsp @@ -2367,6 +2433,14 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( SpillSlotOffset -= SlotSize; MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + // The async context lives directly before the frame pointer, and we + // allocate a second slot to preserve stack alignment. + if (X86FI->hasSwiftAsyncContext()) { + SpillSlotOffset -= SlotSize; + MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + SpillSlotOffset -= SlotSize; + } + // Since emitPrologue and emitEpilogue will handle spilling and restoring of // the frame register, we can delete it from CSI list and not have to worry // about avoiding it later. @@ -3267,7 +3341,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { assert(MBB.getParent() && "Block is not attached to a function!"); const MachineFunction &MF = *MBB.getParent(); - return !TRI->hasStackRealignment(MF) || !MBB.isLiveIn(X86::EFLAGS); + if (!MBB.isLiveIn(X86::EFLAGS)) + return true; + + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); } bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { @@ -3280,6 +3358,12 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) return false; + // Swift async context epilogue has a BTR instruction that clobbers parts of + // EFLAGS. + const MachineFunction &MF = *MBB.getParent(); + if (MF.getInfo()->hasSwiftAsyncContext()) + return !flagsNeedToBePreservedBeforeTheTerminators(MBB); + if (canUseLEAForSPInEpilogue(*MBB.getParent())) return true; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66df933..4057e78 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3747,6 +3747,20 @@ SDValue X86TargetLowering::LowerFormalArguments( } for (unsigned I = 0, E = Ins.size(); I != E; ++I) { + if (Ins[I].Flags.isSwiftAsync()) { + auto X86FI = MF.getInfo(); + if (Subtarget.is64Bit()) + X86FI->setHasSwiftAsyncContext(true); + else { + int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false); + X86FI->setSwiftAsyncContextFrameIdx(FI); + SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I], + DAG.getFrameIndex(FI, MVT::i32), + MachinePointerInfo::getFixedStack(MF, FI)); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); + } + } + // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) @@ -25856,7 +25870,27 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } - + case Intrinsic::swift_async_context_addr: { + auto &MF = DAG.getMachineFunction(); + auto X86FI = MF.getInfo(); + if (Subtarget.is64Bit()) { + MF.getFrameInfo().setFrameAddressIsTaken(true); + X86FI->setHasSwiftAsyncContext(true); + return SDValue( + DAG.getMachineNode( + X86::SUB64ri8, dl, MVT::i64, + DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), + DAG.getTargetConstant(8, dl, MVT::i32)), + 0); + } else { + // 32-bit so no special extended frame, create or reuse an existing stack + // slot. + if (!X86FI->getSwiftAsyncContextFrameIdx()) + X86FI->setSwiftAsyncContextFrameIdx( + MF.getFrameInfo().CreateStackObject(4, Align(4), false)); + return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); + } + } case Intrinsic::x86_avx512_vp2intersect_q_512: case Intrinsic::x86_avx512_vp2intersect_q_256: case Intrinsic::x86_avx512_vp2intersect_q_128: diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index ecb86bb9..46d2e2a 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -108,6 +108,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// True if this function has any preallocated calls. bool HasPreallocatedCall = false; + /// Whether this function has an extended frame record [Ctx, RBP, Return + /// addr]. If so, bit 60 of the in-memory frame pointer will be 1 to enable + /// other tools to detect the extended record. + bool HasSwiftAsyncContext = false; + + Optional SwiftAsyncContextFrameIdx; + ValueMap PreallocatedIds; SmallVector PreallocatedStackSizes; SmallVector, 0> PreallocatedArgOffsets; @@ -197,6 +204,14 @@ public: bool hasPreallocatedCall() const { return HasPreallocatedCall; } void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; } + bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; } + void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; } + + Optional getSwiftAsyncContextFrameIdx() const { + return SwiftAsyncContextFrameIdx; + } + void setSwiftAsyncContextFrameIdx(int v) { SwiftAsyncContextFrameIdx = v; } + size_t getPreallocatedIdForCallSite(const Value *CS) { auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()}); if (Insert.second) { diff --git a/llvm/test/CodeGen/X86/swift-async-reg.ll b/llvm/test/CodeGen/X86/swift-async-reg.ll new file mode 100644 index 0000000..59b41cc --- /dev/null +++ b/llvm/test/CodeGen/X86/swift-async-reg.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - -fast-isel | FileCheck %s + +define i8* @argument(i8* swiftasync %in) { +; CHECK-LABEL: argument: +; CHECK: movq %r14, %rax + + ret i8* %in +} + +define void @call(i8* %in) { +; CHECK-LABEL: call: +; CHECK: movq %rdi, %r14 + + call i8* @argument(i8* swiftasync %in) + ret void +} diff --git a/llvm/test/CodeGen/X86/swift-async.ll b/llvm/test/CodeGen/X86/swift-async.ll new file mode 100644 index 0000000..9716fe7 --- /dev/null +++ b/llvm/test/CodeGen/X86/swift-async.ll @@ -0,0 +1,111 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s +; RUN: llc -mtriple=i686-apple-darwin %s -o - | FileCheck %s --check-prefix=CHECK-32 + + +define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" { +; CHECK-LABEL: simple: +; CHECK: btsq $60, %rbp +; CHECK: pushq %rbp +; CHECK: pushq %r14 +; CHECK: leaq 8(%rsp), %rbp +; CHECK: pushq +; [...] + +; CHECK: addq $16, %rsp +; CHECK: popq %rbp +; CHECK: btrq $60, %rbp +; CHECK: retq + +; CHECK-32-LABEL: simple: +; CHECK-32: movl 8(%ebp), [[TMP:%.*]] +; CHECK-32: movl [[TMP]], {{.*}}(%ebp) + + ret void +} + +define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" { +; CHECK-LABEL: more_csrs: +; CHECK: btsq $60, %rbp +; CHECK: pushq %rbp +; CHECK: .cfi_offset %rbp, -16 +; CHECK: pushq %r14 +; CHECK: leaq 8(%rsp), %rbp +; CHECK: subq $8, %rsp +; CHECK: pushq %r15 +; CHECK: .cfi_offset %r15, -40 + +; [...] + +; CHECK: popq %r15 +; CHECK: addq $16, %rsp +; CHECK: popq %rbp +; CHECK: btrq $60, %rbp +; CHECK: retq + call void asm sideeffect "", "~{r15}"() + ret void +} + +define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" { +; CHECK-LABEL: locals: +; CHECK: btsq $60, %rbp +; CHECK: pushq %rbp +; CHECK: .cfi_def_cfa_offset 16 +; CHECK: .cfi_offset %rbp, -16 +; CHECK: pushq %r14 +; CHECK: leaq 8(%rsp), %rbp +; CHECK: .cfi_def_cfa_register %rbp +; CHECK: subq $56, %rsp + +; CHECK: leaq -48(%rbp), %rdi +; CHECK: callq _bar + +; CHECK: addq $48, %rsp +; CHECK: addq $16, %rsp +; CHECK: popq %rbp +; CHECK: btrq $60, %rbp +; CHECK: retq + + %var = alloca i32, i32 10 + call void @bar(i32* %var) + ret void +} + +define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" { +; CHECK-LABEL: use_input_context: +; CHECK: movq %r14, (%rdi) + + store i8* %ctx, i8** %ptr + ret void +} + +define i8** @context_in_func() "frame-pointer"="non-leaf" { +; CHECK-LABEL: context_in_func: +; CHECK: leaq -8(%rbp), %rax + +; CHECK-32-LABEL: context_in_func +; CHECK-32: movl %esp, %eax + + %ptr = call i8** @llvm.swift.async.context.addr() + ret i8** %ptr +} + +define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" { +; CHECK-LABEL: write_frame_context: +; CHECK: movq %rbp, [[TMP:%.*]] +; CHECK: subq $8, [[TMP]] +; CHECK: movq %rdi, ([[TMP]]) + + %ptr = call i8** @llvm.swift.async.context.addr() + store i8* %newctx, i8** %ptr + ret void +} + +define void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" { +; CHECK-LABEL: simple_fp_elim: +; CHECK-NOT: btsq + + ret void +} + +declare void @bar(i32*) +declare i8** @llvm.swift.async.context.addr() -- 2.7.4