From 63eb7ff47de5df48b6bc0cf0a6d3d17022634151 Mon Sep 17 00:00:00 2001 From: Ties Stuij Date: Tue, 7 Dec 2021 10:13:17 +0000 Subject: [PATCH] [ARM] Implement PAC return address signing mechanism for PACBTI-M This patch implements PAC return address signing for armv8-m. This patch roughly accomplishes the following things: - PAC and AUT instructions are generated. - They're part of the stack frame setup, so that shrink-wrapping can move them inwards to cover only part of a function - The auth code generated by PAC is saved across subroutine calls so that AUT can find it again to check - PAC is emitted before stacking registers (so that the SP it signs is the one on function entry). - The new pseudo-register ra_auth_code is mentioned in the DWARF frame data - With CMSE also in use: PAC is emitted before stacking FPCXTNS, and AUT validates the corresponding value of SP - Emit correct unwind information when PAC is replaced by PACBTI - Handle tail calls correctly Some notes: We make the assembler accept the `.save {ra_auth_code}` directive that is emitted by the compiler when it saves a register that contains a return address authentication code. For EHABI we need to have the `FrameSetup` flag on the instruction and handle the `t2PACBTI` opcode (identically to `t2PAC`), so we can emit `.save {ra_auth_code}`, instead of `.save {r12}`. For PACBTI-M, the instruction which computes return address PAC should use SP value before adjustment for the argument registers save are (used for variadic functions and when a parameter is is split between stack and register), but at the same it should be after the instruction that saves FPCXT when compiling a CMSE entry function. This patch moves the varargs SP adjustment after the FPCXT save (they are never enabled at the same time), so in a following patch handling of the `PAC` instruction can be placed between them. Epilogue emission code adjusted in a similar manner. PACBTI-M code generation should not emit any instructions for architectures v6-m, v8-m.base, and for A- and R-class cores. Diagnostic message for such cases is handled separately by a future ticket. note on tail calls: If the called function has four arguments that occupy registers `r0`-`r3`, the only option for holding the function pointer itself is `r12`, but this register is used to keep the PAC during function/prologue epilogue and clobbers the function pointer. When we do the tail call we need the five registers (`r0`-`r3` and `r12`) to keep six values - the four function arguments, the function pointer and the PAC, which is obviously impossible. One option would be to authenticate the return address before all callee-saved registers are restored, so we have a scratch register to temporarily keep the value of `r12`. The issue with this approach is that it violates a fundamental invariant that PAC is computed using CFA as a modifier. It would also mean using separate instructions to pop `lr` and the rest of the callee-saved registers, which would offset the advantages of doing a tail call. Instead, this patch disables indirect tail calls when the called function take four or more arguments and the return address sign and authentication is enabled for the caller function, conservatively assuming the caller function would spill LR. This patch is part of a series that adds support for the PACBTI-M extension of the Armv8.1-M architecture, as detailed here: https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/armv8-1-m-pointer-authentication-and-branch-target-identification-extension The PACBTI-M specification can be found in the Armv8-M Architecture Reference Manual: https://developer.arm.com/documentation/ddi0553/latest The following people contributed to this patch: - Momchil Velikov - Ties Stuij Reviewed By: danielkiss Differential Revision: https://reviews.llvm.org/D112429 --- llvm/include/llvm/Support/ARMEHABI.h | 4 + llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 36 +++- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 213 ++++++++++++++------ llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 27 ++- llvm/lib/Target/ARM/ARMBranchTargets.cpp | 5 +- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 8 + llvm/lib/Target/ARM/ARMFrameLowering.cpp | 94 ++++++--- llvm/lib/Target/ARM/ARMISelLowering.cpp | 14 +- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 2 +- llvm/lib/Target/ARM/ARMMachineFunctionInfo.h | 2 +- llvm/lib/Target/ARM/ARMRegisterInfo.td | 2 + llvm/lib/Target/ARM/ARMSubtarget.h | 3 + llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 31 ++- .../lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 67 +++++-- .../lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp | 5 +- .../CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll | 3 +- llvm/test/CodeGen/ARM/ipra-reg-usage.ll | 2 +- llvm/test/CodeGen/ARM/machine-outliner-calls.mir | 16 +- llvm/test/CodeGen/ARM/machine-outliner-default.mir | 24 +-- .../CodeGen/ARM/machine-outliner-lr-regsave.mir | 8 +- .../ARM/machine-outliner-stack-fixup-arm.mir | 16 +- .../ARM/machine-outliner-stack-fixup-thumb.mir | 20 +- llvm/test/CodeGen/ARM/va_arg.ll | 18 +- llvm/test/CodeGen/ARM/vargs_align.ll | 3 +- llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir | 96 +++++++++ llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll | 43 ++++ llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll | 124 ++++++++++++ .../CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll | 37 ++++ llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll | 144 ++++++++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll | 89 +++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll | 166 ++++++++++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll | 219 +++++++++++++++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll | 98 +++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll | 71 +++++++ .../CodeGen/Thumb2/pacbti-m-unsupported-arch.ll | 31 +++ llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll | 77 ++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll | 101 ++++++++++ llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll | 125 ++++++++++++ llvm/test/MC/ARM/ra-auth-code-errors.s | 36 ++++ llvm/test/MC/ARM/ra-auth-code.s | 24 +++ .../test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt | 12 +- llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s | 6 +- llvm/tools/llvm-readobj/ARMEHABIPrinter.h | 8 +- 43 files changed, 1918 insertions(+), 212 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir create mode 100644 llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll create mode 100644 llvm/test/MC/ARM/ra-auth-code-errors.s create mode 100644 llvm/test/MC/ARM/ra-auth-code.s diff --git a/llvm/include/llvm/Support/ARMEHABI.h b/llvm/include/llvm/Support/ARMEHABI.h index 3fbb56d..1a7778f 100644 --- a/llvm/include/llvm/Support/ARMEHABI.h +++ b/llvm/include/llvm/Support/ARMEHABI.h @@ -71,6 +71,10 @@ namespace EHABI { // Purpose: finish UNWIND_OPCODE_FINISH = 0xb0, + // Format: 10110100 + // Purpose: Pop Return Address Authetication Code + UNWIND_OPCODE_POP_RA_AUTH_CODE = 0xb4, + // Format: 10110001 0000xxxx // Purpose: pop r[3:0] // Constraint: x != 0 diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 6a88ac4..fa09b25 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1153,8 +1153,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { unsigned StartOp = 2 + 2; // Use all the operands. unsigned NumOffset = 0; - // Amount of SP adjustment folded into a push. - unsigned Pad = 0; + // Amount of SP adjustment folded into a push, before the + // registers are stored (pad at higher addresses). + unsigned PadBefore = 0; + // Amount of SP adjustment folded into a push, after the + // registers are stored (pad at lower addresses). + unsigned PadAfter = 0; switch (Opc) { default: @@ -1185,7 +1189,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { "Pad registers must come before restored ones"); unsigned Width = TargetRegInfo->getRegSizeInBits(MO.getReg(), MachineRegInfo) / 8; - Pad += Width; + PadAfter += Width; continue; } // Check for registers that are remapped (for a Thumb1 prologue that @@ -1201,14 +1205,32 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::t2STR_PRE: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg)) + SrcReg = RemappedReg; + + RegList.push_back(SrcReg); + break; + case ARM::t2STRD_PRE: + assert(MI->getOperand(3).getReg() == ARM::SP && + "Only stack pointer as a source reg is supported"); + SrcReg = MI->getOperand(1).getReg(); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg)) + SrcReg = RemappedReg; + RegList.push_back(SrcReg); + SrcReg = MI->getOperand(2).getReg(); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg)) + SrcReg = RemappedReg; RegList.push_back(SrcReg); + PadBefore = -MI->getOperand(4).getImm() - 8; break; } if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { + if (PadBefore) + ATS.emitPad(PadBefore); ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); // Account for the SP adjustment, folded into the push. - if (Pad) - ATS.emitPad(Pad); + if (PadAfter) + ATS.emitPad(PadAfter); } } else { // Changes of stack / frame pointer. @@ -1300,6 +1322,10 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { Offset = MI->getOperand(2).getImm(); AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16); break; + case ARM::t2PAC: + case ARM::t2PACBTI: + AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE; + break; default: MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2a12947..1f7f425 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5678,7 +5678,7 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, /// | | Thumb2 | ARM | /// +-------------------------+--------+-----+ /// | Call overhead in Bytes | 4 | 4 | -/// | Frame overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 2 | 4 | /// | Stack fixup required | No | No | /// +-------------------------+--------+-----+ /// @@ -5755,7 +5755,7 @@ struct OutlinerCosts { CallThunk(target.isThumb() ? 4 : 4), FrameThunk(target.isThumb() ? 0 : 0), CallNoLRSave(target.isThumb() ? 4 : 4), - FrameNoLRSave(target.isThumb() ? 4 : 4), + FrameNoLRSave(target.isThumb() ? 2 : 4), CallRegSave(target.isThumb() ? 8 : 12), FrameRegSave(target.isThumb() ? 2 : 4), CallDefault(target.isThumb() ? 8 : 12), @@ -5868,11 +5868,17 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( return outliner::OutlinedFunction(); } + // We expect the majority of the outlining candidates to be in consensus with + // regard to return address sign and authentication, and branch target + // enforcement, in other words, partitioning according to all the four + // possible combinations of PAC-RET and BTI is going to yield one big subset + // and three small (likely empty) subsets. That allows us to cull incompatible + // candidates separately for PAC-RET and BTI. + // Partition the candidates in two sets: one with BTI enabled and one with BTI - // disabled. Remove the candidates from the smaller set. We expect the - // majority of the candidates to be in consensus with regard to branch target - // enforcement with just a few oddballs, but if they are the same number - // prefer the non-BTI ones for outlining, since they have less overhead. + // disabled. Remove the candidates from the smaller set. If they are the same + // number prefer the non-BTI ones for outlining, since they have less + // overhead. auto NoBTI = llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) { const ARMFunctionInfo &AFI = *C.getMF()->getInfo(); @@ -5883,6 +5889,24 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end()); else RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI); + + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + + // Likewise, partition the candidates according to PAC-RET enablement. + auto NoPAC = + llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) { + const ARMFunctionInfo &AFI = *C.getMF()->getInfo(); + // If the function happens to not spill the LR, do not disqualify it + // from the outlining. + return AFI.shouldSignReturnAddress(true); + }); + if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) > + std::distance(NoPAC, RepeatedSequenceLocs.end())) + RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end()); + else + RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC); + if (RepeatedSequenceLocs.size() < 2) return outliner::OutlinedFunction(); @@ -5899,6 +5923,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( }; OutlinerCosts Costs(Subtarget); + const auto &SomeMFI = *RepeatedSequenceLocs.front().getMF()->getInfo(); // Adjust costs to account for the BTI instructions. @@ -5909,6 +5934,13 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( Costs.FrameTailCall += 4; Costs.FrameThunk += 4; } + + // Adjust costs to account for sign and authentication instructions. + if (SomeMFI.shouldSignReturnAddress(true)) { + Costs.CallDefault += 8; // +PAC instr, +AUT instr + Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr + } + unsigned FrameID = MachineOutlinerDefault; unsigned NumBytesToCreateFrame = Costs.FrameDefault; @@ -6325,6 +6357,11 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, // * LR is available in the range (No save/restore around call) // * The range doesn't include calls (No save/restore in outlined frame) // are true. + // These conditions also ensure correctness of the return address + // authentication - we insert sign and authentication instructions only if + // we save/restore LR on stack, but then this condition ensures that the + // outlined range does not modify the SP, therefore the SP value used for + // signing is the same as the one used for authentication. // FIXME: This is very restrictive; the flags check the whole block, // not just the bit we will try to outline. bool MightNeedStackFixUp = @@ -6369,23 +6406,39 @@ void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { } void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const { - unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; - int Align = -Subtarget.getStackAlignment().value(); - BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) - .addReg(ARM::LR, RegState::Kill) - .addReg(ARM::SP) - .addImm(Align) - .add(predOps(ARMCC::AL)); -} + MachineBasicBlock::iterator It, bool CFI, + bool Auth) const { + int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8)); + assert(Align >= 8 && Align <= 256); + if (Auth) { + assert(Subtarget.isThumb2()); + // Compute PAC in R12. Outlining ensures R12 is dead across the outlined + // sequence. + BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)) + .setMIFlags(MachineInstr::FrameSetup); + BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(-Align) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; + BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(-Align) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + } + + if (!CFI) + return; -void ARMBaseInstrInfo::emitCFIForLRSaveOnStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { MachineFunction &MF = *MBB.getParent(); - const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); - unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); - int Align = Subtarget.getStackAlignment().value(); - // Add a CFI saying the stack was moved down. + + // Add a CFI, saying CFA is offset by Align bytes from SP. int64_t StackPosEntry = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align)); BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) @@ -6394,11 +6447,23 @@ void ARMBaseInstrInfo::emitCFIForLRSaveOnStack( // Add a CFI saying that the LR that we want to find is now higher than // before. - int64_t LRPosEntry = - MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align)); + int LROffset = Auth ? Align - 4 : Align; + const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); + unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); + int64_t LRPosEntry = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset)); BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) .addCFIIndex(LRPosEntry) .setMIFlags(MachineInstr::FrameSetup); + if (Auth) { + // Add a CFI for the location of the return adddress PAC. + unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true); + int64_t RACPosEntry = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(RACPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + } } void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB, @@ -6416,35 +6481,64 @@ void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB, .setMIFlags(MachineInstr::FrameSetup); } -void ARMBaseInstrInfo::restoreLRFromStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { - unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP); - if (!Subtarget.isThumb()) - MIB.addReg(0); - MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL)); -} +void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator It, + bool CFI, bool Auth) const { + int Align = Subtarget.getStackAlignment().value(); + if (Auth) { + assert(Subtarget.isThumb2()); + // Restore return address PAC and LR. + BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST)) + .addReg(ARM::R12, RegState::Define) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(Align) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + // LR authentication is after the CFI instructions, below. + } else { + unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP); + if (!Subtarget.isThumb()) + MIB.addReg(0); + MIB.addImm(Subtarget.getStackAlignment().value()) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + } -void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { - // Now stack has moved back up... - MachineFunction &MF = *MBB.getParent(); - const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); - unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); - int64_t StackPosEntry = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); - BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) - .addCFIIndex(StackPosEntry) - .setMIFlags(MachineInstr::FrameDestroy); + if (CFI) { + // Now stack has moved back up... + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); + unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameDestroy); + + // ... and we have restored LR. + int64_t LRPosEntry = + MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameDestroy); + + if (Auth) { + unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true); + int64_t Entry = + MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(Entry) + .setMIFlags(MachineInstr::FrameDestroy); + } + } - // ... and we have restored LR. - int64_t LRPosEntry = - MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR)); - BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) - .addCFIIndex(LRPosEntry) - .setMIFlags(MachineInstr::FrameDestroy); + if (Auth) + BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT)); } void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg( @@ -6500,8 +6594,11 @@ void ARMBaseInstrInfo::buildOutlinedFrame( MBB.addLiveIn(ARM::LR); // Insert a save before the outlined region - saveLROnStack(MBB, It); - emitCFIForLRSaveOnStack(MBB, It); + bool Auth = OF.Candidates.front() + .getMF() + ->getInfo() + ->shouldSignReturnAddress(true); + saveLROnStack(MBB, It, true, Auth); // Fix up the instructions in the range, since we're going to modify the // stack. @@ -6510,8 +6607,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame( fixupPostOutline(MBB); // Insert a restore before the terminator for the function. Restore LR. - restoreLRFromStack(MBB, Et); - emitCFIForLRRestoreFromStack(MBB, Et); + restoreLRFromStack(MBB, Et, true, Auth); } // If this is a tail call outlined function, then there's already a return. @@ -6590,13 +6686,10 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( // We have the default case. Save and restore from SP. if (!MBB.isLiveIn(ARM::LR)) MBB.addLiveIn(ARM::LR); - saveLROnStack(MBB, It); - if (!AFI.isLRSpilled()) - emitCFIForLRSaveOnStack(MBB, It); + bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true); + saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth); CallPt = MBB.insert(It, CallMIB); - restoreLRFromStack(MBB, It); - if (!AFI.isLRSpilled()) - emitCFIForLRRestoreFromStack(MBB, It); + restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth); It--; return CallPt; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 5fa912a..defce07 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -377,20 +377,20 @@ private: /// constructing an outlined call if one exists. Returns 0 otherwise. unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; - // Adds an instruction which saves the link register on top of the stack into - /// the MachineBasicBlock \p MBB at position \p It. - void saveLROnStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; + /// Adds an instruction which saves the link register on top of the stack into + /// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true, + /// compute and store an authentication code alongiside the link register. + /// If \p CFI is true, emit CFI instructions. + void saveLROnStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator It, + bool CFI, bool Auth) const; /// Adds an instruction which restores the link register from the top the - /// stack into the MachineBasicBlock \p MBB at position \p It. + /// stack into the MachineBasicBlock \p MBB at position \p It. If \p Auth is + /// true, restore an authentication code and authenticate LR. + /// If \p CFI is true, emit CFI instructions. void restoreLRFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; - - /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, - /// for the case when the LR is saved on the stack. - void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; + MachineBasicBlock::iterator It, bool CFI, + bool Auth) const; /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, /// for the case when the LR is saved in the register \p Reg. @@ -399,11 +399,6 @@ private: Register Reg) const; /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, - /// after the LR is was restored from the stack. - void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; - - /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, /// after the LR is was restored from a register. void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const; diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp index 1091c1f..8ba3e62 100644 --- a/llvm/lib/Target/ARM/ARMBranchTargets.cpp +++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp @@ -108,6 +108,7 @@ void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, bool IsFirstBB) { // Which instruction to insert: BTI or PACBTI unsigned OpCode = ARM::t2BTI; + unsigned MIFlags = 0; // Skip meta instructions, including EH labels auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) { @@ -121,6 +122,7 @@ void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName() << "' to replace with PACBTI\n"); OpCode = ARM::t2PACBTI; + MIFlags = MachineInstr::FrameSetup; auto NextMBBI = std::next(MBBI); MBBI->eraseFromParent(); MBBI = NextMBBI; @@ -131,5 +133,6 @@ void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI") << "' instr into BB '" << MBB.getName() << "'\n"); // Finally, insert a new instruction (either PAC or PACBTI) - BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode)); + BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode)) + .setMIFlags(MIFlags); } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index d6b1444..fa24478 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2160,6 +2160,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } case ARM::tBXNS_RET: { + // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which + // uses R12 as a scratch register. + if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress()) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT)); + MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); if (STI->hasV8_1MMainlineOps()) { @@ -2169,6 +2174,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addReg(ARM::SP) .addImm(4) .add(predOps(ARMCC::AL)); + + if (AFI->shouldSignReturnAddress()) + BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT)); } // Clear all GPR that are not a use of the return instruction. diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index b866cf9..73456b2 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -503,20 +503,12 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, StackAdjustingInsts DefCFAOffsetCandidates; bool HasFP = hasFP(MF); - // Allocate the vararg register save area. - if (ArgRegsSaveSize) { - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, - MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); - } - if (!AFI->hasStackFrame() && (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { - if (NumBytes - ArgRegsSaveSize != 0) { - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), + if (NumBytes != 0) { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), - NumBytes - ArgRegsSaveSize, true); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true); } DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); return; @@ -562,13 +554,26 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - // Move past FPCXT area. MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + + // Move past the PAC computation. + if (AFI->shouldSignReturnAddress()) + LastPush = MBBI++; + + // Move past FPCXT area. if (FPCXTSaveSize > 0) { LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true); } + // Allocate the vararg register save area. + if (ArgRegsSaveSize) { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, + MachineInstr::FrameSetup); + LastPush = std::prev(MBBI); + DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true); + } + // Move past area 1. if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; @@ -788,7 +793,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R11: case ARM::R12: if (STI.splitFramePushPop(MF)) { - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned DwarfReg = MRI->getDwarfRegNum( + Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true); unsigned Offset = MFI.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); @@ -923,8 +929,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); if (!AFI->hasStackFrame()) { - if (NumBytes - ReservedArgStack != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ReservedArgStack, + if (NumBytes + IncomingArgStackToRestore != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, + NumBytes + IncomingArgStackToRestore, MachineInstr::FrameDestroy); } else { // Unwind MBBI to point to first LDR / VLDRD. @@ -1007,15 +1014,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; - if (AFI->getFPCXTSaveAreaSize()) MBBI++; - } - if (ReservedArgStack || IncomingArgStackToRestore) { - assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 && - "attempting to restore negative stack amount"); - emitSPUpdate(isARM, MBB, MBBI, dl, TII, - ReservedArgStack + IncomingArgStackToRestore, - MachineInstr::FrameDestroy); + if (ReservedArgStack || IncomingArgStackToRestore) { + assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 && + "attempting to restore negative stack amount"); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, + ReservedArgStack + IncomingArgStackToRestore, + MachineInstr::FrameDestroy); + } + + // Validate PAC, It should have been already popped into R12. For CMSE entry + // function, the validation instruction is emitted during expansion of the + // tBXNS_RET, since the validation must use the value of SP at function + // entry, before saving, resp. after restoring, FPCXTNS. + if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) + BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT)); } } @@ -1199,6 +1212,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); ARMFunctionInfo *AFI = MF.getInfo(); + bool hasPAC = AFI->shouldSignReturnAddress(); DebugLoc DL; bool isTailCall = false; bool isInterrupt = false; @@ -1231,7 +1245,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty()) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1580,6 +1594,11 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + // Compute PAC in R12. + if (AFI->shouldSignReturnAddress()) { + BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC)) + .setMIFlags(MachineInstr::FrameSetup); + } // Save the non-secure floating point context. if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) { return C.getReg() == ARM::FPCXTNS; @@ -1789,6 +1808,13 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { MF.getInfo()->isCmseNSEntryFunction()) return false; + // We are disabling shrinkwrapping for now when PAC is enabled, as + // shrinkwrapping can cause clobbering of r12 when the PAC code is + // generated. A follow-up patch will fix this in a more performant manner. + if (MF.getInfo()->shouldSignReturnAddress( + false /*SpillsLR */)) + return false; + return true; } @@ -2315,6 +2341,26 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots( CSI.back().setRestored(false); } + // For functions, which sign their return address, upon function entry, the + // return address PAC is computed in R12. Treat R12 as a callee-saved register + // in this case. + const auto &AFI = *MF.getInfo(); + if (AFI.shouldSignReturnAddress()) { + // The order of register must match the order we push them, because the + // PEI assigns frame indices in that order. When compiling for return + // address sign and authenication, we use split push, therefore the orders + // we want are: + // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 + CSI.insert(find_if(CSI, + [=](const auto &CS) { + unsigned Reg = CS.getReg(); + return Reg == ARM::R10 || Reg == ARM::R11 || + Reg == ARM::R8 || Reg == ARM::R9 || + ARM::DPRRegClass.contains(Reg); + }), + CalleeSavedInfo(ARM::R12)); + } + return false; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index add5b1c..3281451 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2952,9 +2952,17 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Indirect tail calls cannot be optimized for Thumb1 if the args // to the call take up r0-r3. The reason is that there are no legal registers // left to hold the pointer to the function to be called. - if (Subtarget->isThumb1Only() && Outs.size() >= 4 && - (!isa(Callee.getNode()) || isIndirect)) - return false; + // Similarly, if the function uses return address sign and authentication, + // r12 is needed to hold the PAC and is not available to hold the callee + // address. + if (Outs.size() >= 4 && + (!isa(Callee.getNode()) || isIndirect)) { + if (Subtarget->isThumb1Only()) + return false; + // Conservatively assume the function spills LR. + if (MF.getInfo()->shouldSignReturnAddress(true)) + return false; + } // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 3b10c60..e02902d 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2121,7 +2121,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { bool Modified = false; for (MachineBasicBlock &MBB : Fn) { Modified |= LoadStoreMultipleOpti(MBB); - if (STI->hasV5TOps()) + if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress()) Modified |= MergeReturnIntoLDM(MBB); if (isThumb1) Modified |= CombineMovBx(MBB); diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 4077fc0..d8d9370 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -289,7 +289,7 @@ public: return false; if (SignReturnAddressAll) return true; - return LRSpilled; + return SpillsLR; } bool branchTargetEnforcement() const { return BranchTargetEnforcement; } diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td index e7a3793..194d65c 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -211,6 +211,8 @@ def FPCXTS : ARMReg<15, "fpcxts">; def ZR : ARMReg<15, "zr">, DwarfRegNum<[15]>; +def RA_AUTH_CODE : ARMReg<12, "ra_auth_code">, DwarfRegNum<[143]>; + // Register classes. // // pc == Program Counter diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index cb46a6b7..e61b90a 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -18,6 +18,7 @@ #include "ARMConstantPoolValue.h" #include "ARMFrameLowering.h" #include "ARMISelLowering.h" +#include "ARMMachineFunctionInfo.h" #include "ARMSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -844,6 +845,8 @@ public: /// to lr. This is always required on Thumb1-only targets, as the push and /// pop instructions can't access the high registers. bool splitFramePushPop(const MachineFunction &MF) const { + if (MF.getInfo()->shouldSignReturnAddress()) + return true; return (getFramePointerReg() == ARM::R7 && MF.getTarget().Options.DisableFramePointerElim(MF)) || isThumb1Only(); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 39f407b..980c441 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -452,7 +452,8 @@ class ARMAsmParser : public MCTargetAsmParser { int tryParseRegister(); bool tryParseRegisterWithWriteBack(OperandVector &); int tryParseShiftRegister(OperandVector &); - bool parseRegisterList(OperandVector &, bool EnforceOrder = true); + bool parseRegisterList(OperandVector &, bool EnforceOrder = true, + bool AllowRAAC = false); bool parseMemory(OperandVector &); bool parseOperand(OperandVector &, StringRef Mnemonic); bool parsePrefix(ARMMCExpr::VariantKind &RefKind); @@ -4464,8 +4465,8 @@ insertNoDuplicates(SmallVectorImpl> &Regs, } /// Parse a register list. -bool ARMAsmParser::parseRegisterList(OperandVector &Operands, - bool EnforceOrder) { +bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, + bool AllowRAAC) { MCAsmParser &Parser = getParser(); if (Parser.getTok().isNot(AsmToken::LCurly)) return TokError("Token is not a Left Curly Brace"); @@ -4478,7 +4479,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, int Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); - + if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE) + return Error(RegLoc, "pseudo-register not allowed"); // The reglist instructions have at most 16 registers, so reserve // space for that many. int EReg = 0; @@ -4492,7 +4494,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, ++Reg; } const MCRegisterClass *RC; - if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + if (Reg == ARM::RA_AUTH_CODE || + ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::DPRRegClassID]; @@ -4513,11 +4516,15 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { + if (Reg == ARM::RA_AUTH_CODE) + return Error(RegLoc, "pseudo-register not allowed"); Parser.Lex(); // Eat the minus. SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) return Error(AfterMinusLoc, "register expected"); + if (EndReg == ARM::RA_AUTH_CODE) + return Error(AfterMinusLoc, "pseudo-register not allowed"); // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) EndReg = getDRegFromQReg(EndReg) + 1; @@ -4526,7 +4533,9 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, if (Reg == EndReg) continue; // The register must be in the same register class as the first. - if (!RC->contains(EndReg)) + if ((Reg == ARM::RA_AUTH_CODE && + RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) || + (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg))) return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) @@ -4551,13 +4560,15 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); + if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE) + return Error(RegLoc, "pseudo-register not allowed"); // Allow Q regs and just interpret them as the two D sub-registers. bool isQReg = false; if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); isQReg = true; } - if (!RC->contains(Reg) && + if (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg) && RC->getID() == ARMMCRegisterClasses[ARM::GPRRegClassID].getID() && ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) { // switch the register classes, as GPRwithAPSRnospRegClassID is a partial @@ -4577,7 +4588,9 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, continue; } // The register must be in the same register class as the first. - if (!RC->contains(Reg)) + if ((Reg == ARM::RA_AUTH_CODE && + RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) || + (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg))) return Error(RegLoc, "invalid register in register list"); // In most cases, the list must be monotonically increasing. An // exception is CLRM, which is order-independent anyway, so @@ -11685,7 +11698,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { SmallVector, 1> Operands; // Parse the register list - if (parseRegisterList(Operands) || + if (parseRegisterList(Operands, true, true) || parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) return true; ARMOperand &Op = (ARMOperand &)*Operands[0]; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 896b104..e060e59 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1289,34 +1289,65 @@ void ARMELFStreamer::emitPad(int64_t Offset) { PendingOffset -= Offset; } -void ARMELFStreamer::emitRegSave(const SmallVectorImpl &RegList, - bool IsVector) { - // Collect the registers in the register list - unsigned Count = 0; +static std::pair +collectHWRegs(const MCRegisterInfo &MRI, unsigned Idx, + const SmallVectorImpl &RegList, bool IsVector, + uint32_t &Mask_) { uint32_t Mask = 0; - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - for (size_t i = 0; i < RegList.size(); ++i) { - unsigned Reg = MRI->getEncodingValue(RegList[i]); + unsigned Count = 0; + while (Idx > 0) { + unsigned Reg = RegList[Idx - 1]; + if (Reg == ARM::RA_AUTH_CODE) + break; + Reg = MRI.getEncodingValue(Reg); assert(Reg < (IsVector ? 32U : 16U) && "Register out of range"); unsigned Bit = (1u << Reg); if ((Mask & Bit) == 0) { Mask |= Bit; ++Count; } + --Idx; } - // Track the change the $sp offset: For the .save directive, the - // corresponding push instruction will decrease the $sp by (4 * Count). - // For the .vsave directive, the corresponding vpush instruction will - // decrease $sp by (8 * Count). - SPOffset -= Count * (IsVector ? 8 : 4); + Mask_ = Mask; + return {Idx, Count}; +} - // Emit the opcode - FlushPendingOffset(); - if (IsVector) - UnwindOpAsm.EmitVFPRegSave(Mask); - else - UnwindOpAsm.EmitRegSave(Mask); +void ARMELFStreamer::emitRegSave(const SmallVectorImpl &RegList, + bool IsVector) { + uint32_t Mask; + unsigned Idx, Count; + const MCRegisterInfo &MRI = *getContext().getRegisterInfo(); + + // Collect the registers in the register list. Issue unwinding instructions in + // three parts: ordinary hardware registers, return address authentication + // code pseudo register, the rest of the registers. The RA PAC is kept in an + // architectural register (usually r12), but we treat it as a special case in + // order to distinguish between that register containing RA PAC or a general + // value. + Idx = RegList.size(); + while (Idx > 0) { + std::tie(Idx, Count) = collectHWRegs(MRI, Idx, RegList, IsVector, Mask); + if (Count) { + // Track the change the $sp offset: For the .save directive, the + // corresponding push instruction will decrease the $sp by (4 * Count). + // For the .vsave directive, the corresponding vpush instruction will + // decrease $sp by (8 * Count). + SPOffset -= Count * (IsVector ? 8 : 4); + + // Emit the opcode + FlushPendingOffset(); + if (IsVector) + UnwindOpAsm.EmitVFPRegSave(Mask); + else + UnwindOpAsm.EmitRegSave(Mask); + } else if (Idx > 0 && RegList[Idx - 1] == ARM::RA_AUTH_CODE) { + --Idx; + SPOffset -= 4; + FlushPendingOffset(); + UnwindOpAsm.EmitRegSave(0); + } + } } void ARMELFStreamer::emitUnwindRaw(int64_t Offset, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 781627c..50f416b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -64,8 +64,11 @@ namespace { } // end anonymous namespace void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { - if (RegSave == 0u) + if (RegSave == 0u) { + // That's the special case for RA PAC. + EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_RA_AUTH_CODE); return; + } // One byte opcode to save register r14 and r11-r4 if (RegSave & (1u << 4)) { diff --git a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll index f50fa8c..8e48f61 100644 --- a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -10,8 +10,7 @@ declare void @llvm.va_end(i8*) nounwind ; CHECK-LABEL: test_byval_8_bytes_alignment: define void @test_byval_8_bytes_alignment(i32 %i, ...) { entry: -; CHECK: sub sp, sp, #12 -; CHECK: sub sp, sp, #4 +; CHECK: sub sp, sp, #16 ; CHECK: add r0, sp, #4 ; CHECK: stmib sp, {r1, r2, r3} %g = alloca i8* diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll index f74d63a..03a85d8 100644 --- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll @@ -6,7 +6,7 @@ target triple = "armv7-eabi" declare void @bar1() define void @foo()#0 { -; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30 +; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30 call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir index f18eeb8..a92c9dd 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir @@ -311,7 +311,7 @@ body: | ; CHECK-LABEL: name: OUTLINED_FUNCTION_0 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp @@ -320,13 +320,13 @@ body: | ; CHECK: $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: MOVPCLR 14 /* CC::al */, $noreg ; CHECK-LABEL: name: OUTLINED_FUNCTION_1 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp @@ -335,7 +335,7 @@ body: | ; CHECK: $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: TAILJMPd @bar, implicit $sp ; CHECK-LABEL: name: OUTLINED_FUNCTION_2 @@ -351,27 +351,27 @@ body: | ; CHECK-LABEL: name: OUTLINED_FUNCTION_3 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp ; CHECK: $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: tTAILJMPdND @bar, 14 /* CC::al */, $noreg, implicit $sp ; CHECK-LABEL: name: OUTLINED_FUNCTION_4 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp ; CHECK: $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-default.mir b/llvm/test/CodeGen/ARM/machine-outliner-default.mir index fa5119c..6d0218d 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-default.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-default.mir @@ -18,19 +18,19 @@ body: | ; CHECK-LABEL: name: outline_default_arm ; CHECK: bb.0: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11 ; CHECK: $r2 = MOVr $lr, 14 /* CC::al */, $noreg, $noreg @@ -72,19 +72,19 @@ body: | ; CHECK-LABEL: name: outline_default_thumb ; CHECK: bb.0: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 ; CHECK: $r2 = tMOVr $lr, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir b/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir index 5c16a28..b23c9e5 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir @@ -28,9 +28,9 @@ body: | ; CHECK: $lr = MOVr killed $r6, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r0, $r6, $r7, $r8, $r9, $r10, $r11 ; CHECK: $r6 = MOVr killed $lr, 14 /* CC::al */, $noreg, $noreg @@ -95,9 +95,9 @@ body: | ; CHECK: $lr = tMOVr killed $r6, 14 /* CC::al */, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r0, $r6, $r7 ; CHECK: $r6 = tMOVr killed $lr, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir index 1f8745c..ae5caa5 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir @@ -146,41 +146,41 @@ body: | BX_RET 14, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I5]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $d0 = VLDRD $sp, 2, 14 /* CC::al */, $noreg ;CHECK-NEXT: $d1 = VLDRD $sp, 10, 14 /* CC::al */, $noreg ;CHECK-NEXT: $d4 = VLDRD $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I5FP16]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $s1 = VLDRH $sp, 4, 14, $noreg ;CHECK-NEXT: $s2 = VLDRH $sp, 12, 14, $noreg ;CHECK-NEXT: $s5 = VLDRH $sp, 244, 14, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I12]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r1 = LDRi12 $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r2 = LDRi12 $sp, 16, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r5 = LDRi12 $sp, 4094, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I3]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r1 = LDRSH $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 16, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir index d03ab35..5618444 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir @@ -181,51 +181,51 @@ body: | BX_RET 14, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[LDREX]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r1 = t2LDREX $sp, 2, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r1 = t2LDREX $sp, 10, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r1 = t2LDREX $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I8]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: t2STRHT $r0, $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRHT $r0, $sp, 12, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRHT $r0, $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I8S4]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 16, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I12]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 12, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 4094, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[T1_S]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: tSTRspi $r0, $sp, 2, 14 /* CC::al */, $noreg ;CHECK-NEXT: tSTRspi $r0, $sp, 6, 14 /* CC::al */, $noreg ;CHECK-NEXT: tSTRspi $r0, $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/va_arg.ll b/llvm/test/CodeGen/ARM/va_arg.ll index f942411..b967a7b 100644 --- a/llvm/test/CodeGen/ARM/va_arg.ll +++ b/llvm/test/CodeGen/ARM/va_arg.ll @@ -5,10 +5,8 @@ define i64 @test1(i32 %i, ...) nounwind optsize { ; CHECK-LABEL: test1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, sp, #12 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: stmib sp, {r1, r2, r3} ; CHECK-NEXT: add r0, r0, #7 @@ -19,8 +17,7 @@ define i64 @test1(i32 %i, ...) nounwind optsize { ; CHECK-NEXT: add r2, r1, #8 ; CHECK-NEXT: str r2, [sp] ; CHECK-NEXT: ldr r1, [r1, #4] -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: add sp, sp, #12 +; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: bx lr entry: %g = alloca i8*, align 4 @@ -34,10 +31,8 @@ entry: define double @test2(i32 %a, i32* %b, ...) nounwind optsize { ; CHECK-LABEL: test2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, sp, #8 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, sp, #12 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: stmib sp, {r2, r3} ; CHECK-NEXT: add r0, r0, #11 @@ -47,8 +42,7 @@ define double @test2(i32 %a, i32* %b, ...) nounwind optsize { ; CHECK-NEXT: str r1, [sp] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: add sp, sp, #12 ; CHECK-NEXT: bx lr entry: %ap = alloca i8*, align 4 ; [#uses=3] diff --git a/llvm/test/CodeGen/ARM/vargs_align.ll b/llvm/test/CodeGen/ARM/vargs_align.ll index e01ceeb..b867563 100644 --- a/llvm/test/CodeGen/ARM/vargs_align.ll +++ b/llvm/test/CodeGen/ARM/vargs_align.ll @@ -22,8 +22,7 @@ return: ; preds = %entry ; EABI: add sp, sp, #4 ; EABI: add sp, sp, #12 -; OABI: add sp, sp, #12 -; OABI: add sp, sp, #12 +; OABI: add sp, sp, #24 } declare void @llvm.va_start(i8*) nounwind diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir b/llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir new file mode 100644 index 0000000..2776799 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir @@ -0,0 +1,96 @@ +# RUN: llc --run-pass=arm-branch-targets %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-unknown-eabi" + + define hidden i32 @_Z1fi(i32 %x) { + entry: + %add = add nsw i32 %x, 1 + %call = tail call i32 @_Z1gi(i32 %add) + %sub = add nsw i32 %call, -1 + ret i32 %sub + } + + declare dso_local i32 @_Z1gi(i32) + + !llvm.module.flags = !{!0, !1, !2} + + !0 = !{i32 1, !"branch-target-enforcement", i32 1} + !1 = !{i32 1, !"sign-return-address", i32 1} + !2 = !{i32 1, !"sign-return-address-all", i32 0} + +... +--- +name: _Z1fi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r12', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0, $r7, $lr, $r12 + + frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 12 + frame-setup CFI_INSTRUCTION offset $ra_auth_code, -12 + $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @_Z1gi, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 + renamable $r0, dead $cpsr = nsw tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr + t2AUT implicit $r12, implicit $lr, implicit $sp + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +... +# Check PAC is replaces with PACBTI and it has the frame-setup flag +# CHECK-LABEL: bb.0.entry: +# CHECK: frame-setup t2PACBTI \ No newline at end of file diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll new file mode 100644 index 0000000..630ae85 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll @@ -0,0 +1,43 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-unknown-eabi" + +define hidden i32 @_Z1fi(i32 %x) { +entry: + %add = add nsw i32 %x, 1 + %call = tail call i32 @_Z1gi(i32 %add) + %sub = add nsw i32 %call, -1 + ret i32 %sub +} + +declare dso_local i32 @_Z1gi(i32) + +!llvm.module.flags = !{!0, !1, !2} +!0 = !{i32 1, !"branch-target-enforcement", i32 1} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; Check the function starts with `pacbti` and correct unwind info is emitted +; CHECK-LABEL: _Z1fi: +; ... +; CHECK: pacbti r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; ... + +; UNWIND-LABEL: Opcodes [ +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll new file mode 100644 index 0000000..d4be44e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll @@ -0,0 +1,124 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-none-none-eabi" + +; int g(int); +; +; #if __ARM_FEATURE_CMSE == 3 +; #define ENTRY __attribute__((cmse_nonsecure_entry)) +; #else +; #define ENTRY +; #endif +; +; ENTRY int f(int x) { +; return 1 + g(x - 1); +; } + +define hidden i32 @f0(i32 %x) local_unnamed_addr { +entry: + %sub = add nsw i32 %x, -1 + %call = tail call i32 @g(i32 %sub) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +; CHECK-LABEL: f0: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +define hidden i32 @f1(i32 %x) local_unnamed_addr #0 { +entry: + %sub = add nsw i32 %x, -1 + %call = tail call i32 @g(i32 %sub) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +; CHECK-LABEL: f1: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK: vldr fpcxtns, [sp], #4 +; CHECK: aut r12, lr, sp + +define hidden i32 @f2(i32 %x) local_unnamed_addr #1 { +entry: + %sub = add nsw i32 %x, -1 + %call = tail call i32 @g(i32 %sub) + %add = add nsw i32 %call, 1 + ret i32 %add +} +; CHECK-LABEL: f2: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: mrs r12, control +; ... +; CHECK: bxns lr + +declare dso_local i32 @g(i32) local_unnamed_addr + +attributes #0 = { "cmse_nonsecure_entry" "target-features"="+8msecext,+armv8.1-m.main"} +attributes #1 = { "cmse_nonsecure_entry" "target-features"="+8msecext,+armv8-m.main,+fp-armv8d16"} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x24 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} + +; UNWIND-LABEL: FunctionAddress: 0x54 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: 00000001 {{.*}} f0 +; UNWIND-LABEL: 00000025 {{.*}} f1 +; UNWIND-LABEL: 00000055 {{.*}} f2 diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll new file mode 100644 index 0000000..1feb74c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll @@ -0,0 +1,37 @@ +; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK1 +; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK2 +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-unknown-eabi" + +@p = hidden local_unnamed_addr global i32 (i32, i32, i32, i32)* null, align 4 + +define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %call = tail call i32 @g(i32 %a) #0 + %0 = load i32 (i32, i32, i32, i32)*, i32 (i32, i32, i32, i32)** @p, align 4 + %call1 = tail call i32 %0(i32 %call, i32 %b, i32 %c, i32 %d) #0 + ret i32 %call1 +} + +; CHECK1-LABEL: f +; ... +; CHECK1: aut r12, lr, sp +; CHECK1-NOT: bx r12 + +; CHECK2-LABEL: f +; ... +; CHECK2: blx r4 +; CHECK2-NEXT: ldr r12, [sp], #4 +; CHECK2-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK2-NEXT: aut r12, lr, sp +; CHECK2-NEXT: bx lr + +declare dso_local i32 @g(i32) local_unnamed_addr #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll new file mode 100644 index 0000000..af761a4 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll @@ -0,0 +1,144 @@ +; RUN: llc --force-dwarf-frame-section --exception-model=arm %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s --exception-model=arm -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Triple tweaked so we get 16-byte stack alignment and better test coverage. +target triple = "armv7m-none-nacl-android" + +; -Oz +; volatile int a, b, c, d, e, f, g, h, i; +; +; int x() { +; int r = (a + b) / (c + d) + e + f / g + h + i; +; return r + 1; +; } +; +; int y() { +; int r = (a + b) / (c + d) + e + f / g + h + i; +; return r + 2; +; } + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 +@g = hidden global i32 0, align 4 +@h = hidden global i32 0, align 4 +@i = hidden global i32 0, align 4 + + +define hidden i32 @x() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 1 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +; CHECK-LABEL: x: +; CHECK: ldr r0, .LCPI0_0 +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: .save {ra_auth_code, lr} +; CHECK-NEXT: strd r12, lr, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldrd r12, lr, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: .cfi_undefined ra_auth_code +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: bx lr + +define hidden i32 @y() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 2 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} +; CHECK-LABEL: y: +; CHECK: ldr r0, .LCPI1_0 +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: .save {ra_auth_code, lr} +; CHECK-NEXT: strd r12, lr, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldrd r12, lr, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: .cfi_undefined ra_auth_code +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK-NOT: r12 +; CHECK: bx lr + +attributes #0 = { minsize nofree norecurse nounwind optsize uwtable} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} +; UNWIND-NEXT: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x20 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} +; UNWIND-NEXT: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x40 +; UNWIND: Model: CantUnwind + +; UNWINND-LABEL: 00000041 {{.*}} OUTLINED_FUNCTION_0 +; UNWINND-LABEL: 00000001 {{.*}} x +; UNWINND-LABEL: 00000021 {{.*}} y diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll new file mode 100644 index 0000000..a346bb4 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll @@ -0,0 +1,89 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; -Oz +; volatile int a, b, c, d, e, f; +; +; int x() { +; int r = a + b + c + d + e + f; +; return r + 1; +; } +; +; int y() { +; int r = a + b + c + d + e + f; +; return r + 2; +; } + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 + +define hidden i32 @x() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %add = add i32 %0, 1 + %add1 = add i32 %add, %1 + %add2 = add i32 %add1, %2 + %add3 = add i32 %add2, %3 + %add4 = add i32 %add3, %4 + %add5 = add i32 %add4, %5 + ret i32 %add5 +} +; CHECK-LABEL: x: +; CHECK: ldr r{{.*}}, .LCPI0_0 +; CHECK-NEXT: mov r[[A:[0-9]*]], lr +; CHECK-NEXT: .cfi_register lr, r[[A]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: mov lr, r[[A]] +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: bx lr + +define hidden i32 @y() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %add = add i32 %0, 2 + %add1 = add i32 %add, %1 + %add2 = add i32 %add1, %2 + %add3 = add i32 %add2, %3 + %add4 = add i32 %add3, %4 + %add5 = add i32 %add4, %5 + ret i32 %add5 +} +; CHECK-LABEL: y: +; CHECK: ldr r{{.*}}, .LCPI1_0 +; CHECK-NEXT: mov r[[B:[0-9]*]], lr +; CHECK-NEXT: .cfi_register lr, r[[B]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: mov lr, r[[B]] +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK-NOT: r12 +; CHECK: bx lr + +attributes #0 = { minsize nofree norecurse nounwind optsize} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll new file mode 100644 index 0000000..24ca3f5 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll @@ -0,0 +1,166 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; -Oz +; __attribute__((noinline)) int h(int a, int b) { return a + b; } +; +; int f(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(11 * a - b, b); +; return 2 + a * (a + b) / (c + d); +; } +; +; int g(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(11 * a - b, b); +; return 1 + a * (a + b) / (c + d); +; } + +define hidden i32 @h(i32 %a, i32 %b) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %b, %a + ret i32 %add +} + +define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %mul = mul nsw i32 %a, 11 + %sub = sub nsw i32 %mul, %b + %call = tail call i32 @h(i32 %sub, i32 %b) + %add = add nsw i32 %call, %b + %mul1 = mul nsw i32 %add, %call + %add2 = add nsw i32 %d, %c + %div = sdiv i32 %mul1, %add2 + %add3 = add nsw i32 %div, 2 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} + +; CHECK-LABEL: f: +; CHECK: bmi .LBB +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .LBB +; CHECK: bx lr + + +define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %mul = mul nsw i32 %a, 11 + %sub = sub nsw i32 %mul, %b + %call = tail call i32 @h(i32 %sub, i32 %b) + %add = add nsw i32 %call, %b + %mul1 = mul nsw i32 %add, %call + %add2 = add nsw i32 %d, %c + %div = sdiv i32 %mul1, %add2 + %add3 = add nsw i32 %div, 1 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} +; CHECK-LABEL: g: +; CHECK: bmi .LBB +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .LBB +; CHECK: bx lr + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {ra_auth_code, lr} +; CHECK-NEXT: strd r12, lr, [sp, #-8]! +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; ... +; CHECK: ldrd r12, lr, [sp], #8 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: .cfi_undefined ra_auth_code +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + + +; UNWIND-LABEL: FunctionAddress: 0x4 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x30 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x5C +; UNWIND: Model: CantUnwind + +; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0 +; UNWIND-LABEL: 00000005 {{.*}} f +; UNWIND-LABEL: 00000031 {{.*}} g diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll new file mode 100644 index 0000000..b325c01a --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll @@ -0,0 +1,219 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; C++, -Oz +; __attribute__((noinline)) int h(int a, int b) { +; if (a < 0) +; throw 1; +; return a + b; +; } +; +; int f(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(a, b); +; return 2 + a * (a + b) / (c + d); +; } +; +; int g(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(a, b); +; return 1 + a * (a + b) / (c + d); +; } + +@_ZTIi = external dso_local constant i8* + +define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = tail call i8* @__cxa_allocate_exception(i32 4) #1 + %0 = bitcast i8* %exception to i32* + store i32 1, i32* %0, align 8 + tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #2 + unreachable + +if.end: ; preds = %entry + %add = add nsw i32 %b, %a + ret i32 %add +} + +; CHECK-LABEL: _Z1hii: +; ... +; CHECK: bxgt lr +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; ... +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK: .cfi_endproc + +declare dso_local i8* @__cxa_allocate_exception(i32) local_unnamed_addr + +declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr + +define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %call = tail call i32 @_Z1hii(i32 %a, i32 %b) + %add = add nsw i32 %call, %b + %mul = mul nsw i32 %add, %call + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %mul, %add1 + %add2 = add nsw i32 %div, 2 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add2, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} + +; CHECK-LABEL: _Z1fiiii: +; ... +; CHECK: bmi .L[[B:[a-zA-Z0-9]*]] +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .L[[B]] +; ... +; CHECK: bx lr + + + +define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %call = tail call i32 @_Z1hii(i32 %a, i32 %b) + %add = add nsw i32 %call, %b + %mul = mul nsw i32 %add, %call + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %mul, %add1 + %add2 = add nsw i32 %div, 1 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add2, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} + +; CHECK-LABEL: _Z1giiii: +; ... +; CHECK: bmi .L[[B:[a-zA-Z0-9]*]] +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .L[[B]] +; ... +; CHECK: bx lr + + +; CHEK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK: b _Z1hii + +attributes #0 = { minsize noinline optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m3" "target-features"="+armv7-m,+hwdiv,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } +attributes #2 = { noreturn } + + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: Opcodes +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x2C +; UNWIND: Opcodes +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x62 +; UNWIND: Opcodes +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x98 +; UNWIND: Opcodes +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND: 00000099 {{.*}} OUTLINED_FUNCTION_0 +; UWNIND: 0000002d {{.*}} _Z1fiiii +; UWNIND: 00000063 {{.*}} _Z1giiii +; UWNIND: 00000001 {{.*}} _Z1hii diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll new file mode 100644 index 0000000..b7a475a --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll @@ -0,0 +1,98 @@ +; RUN: llc %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: x: +; CHECK: bl OUTLINED_FUNCTION +; CHECK-LABEL: y: +; CHECK: bl OUTLINED_FUNCTION +; CHECK-LABEL: z: +; CHECK-NOT: bl OUTLINED_FUNCTION + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 +@g = hidden global i32 0, align 4 +@h = hidden global i32 0, align 4 +@i = hidden global i32 0, align 4 + +define hidden i32 @x() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 1 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +define hidden i32 @y() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 2 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +define hidden i32 @z() local_unnamed_addr #1 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 3 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +attributes #0 = { minsize nofree norecurse nounwind optsize } +attributes #1 = { minsize nofree norecurse nounwind optsize "sign-return-address"="none" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll new file mode 100644 index 0000000..111f2de --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll @@ -0,0 +1,71 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +; int g(int, int *); +; +; int f() { +; __attribute__((aligned(32))) int a[4]; +; g(4, a); +; int s = 0; +; for (int i = 0; i < 4; ++i) +; s += a[i]; +; return s; +; } + +define hidden i32 @_Z1fv() local_unnamed_addr { +entry: + %a = alloca [4 x i32], align 32 + %0 = bitcast [4 x i32]* %a to i8* + %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 0 + %call = call i32 @_Z1giPi(i32 4, i32* nonnull %arraydecay) + %1 = load i32, i32* %arraydecay, align 32 + %arrayidx.1 = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 1 + %2 = load i32, i32* %arrayidx.1, align 4 + %add.1 = add nsw i32 %2, %1 + %arrayidx.2 = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 2 + %3 = load i32, i32* %arrayidx.2, align 8 + %add.2 = add nsw i32 %3, %add.1 + %arrayidx.3 = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 3 + %4 = load i32, i32* %arrayidx.3, align 4 + %add.3 = add nsw i32 %4, %add.2 + ret i32 %add.3 +} + +; CHECK-LABEL: _Z1fv: +; CHECK: pac r12, lr, sp +; CHECK: .save {r4, r6, r7, lr} +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .setfp r7, sp, #8 +; CHECK-NEXT: add r7, sp, #8 +; CHECK-NEXT: .cfi_def_cfa r7, 8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #44 +; CHECK-NEXT: sub sp, #44 +; CHECK: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r6, r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + + +declare dso_local i32 @_Z1giPi(i32, i32*) local_unnamed_addr + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: 0x97 ; vsp = r7 +; UNWIND: 0x42 ; vsp = vsp - 12 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND: 0x84 0x0D ; pop {r4, r6, r7, lr} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll new file mode 100644 index 0000000..0af18a9 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple thumbv6m-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple thumbv8m.base-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple thumbv7a-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple thumbv7m-eabi %s -o - | FileCheck %s --check-prefix=CHECK-PACBTI + +; Check we don't emit PACBTI-M instructions for architectures +; that do not support them. +define hidden i32 @f(i32 %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %sub = sub nsw i32 1, %0 + %call = call i32 @g(i32 %sub) + %add = add nsw i32 1, %call + ret i32 %add +} +; CHECK-LABEL: f: +; CHECK-NOT: bti + +; CHECK-PACBTI-LABEL: f: +; CHECK-PACBTI: pacbti +declare dso_local i32 @g(i32) + +attributes #0 = { noinline nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 1} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll new file mode 100644 index 0000000..89a1e13 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll @@ -0,0 +1,77 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +%"struct.std::__va_list" = type { i8* } + +define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 { +entry: + %ap = alloca %"struct.std::__va_list", align 4 + %0 = bitcast %"struct.std::__va_list"* %ap to i8* + call void @llvm.va_start(i8* nonnull %0) + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %1 = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* %ap, i32 0, i32 0 + %argp.cur.pre = load i8*, i8** %1, align 4 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + ret i32 %s.0.lcssa + +for.body: ; preds = %for.body.lr.ph, %for.body + %argp.cur = phi i8* [ %argp.cur.pre, %for.body.lr.ph ], [ %argp.next, %for.body ] + %i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %s.08 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %1, align 4 + %2 = bitcast i8* %argp.cur to i32* + %3 = load i32, i32* %2, align 4 + %add = add nsw i32 %3, %s.08 + %inc = add nuw nsw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: _Z1fiz: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset r7, -20 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset ra_auth_code, -24 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; ... +; CHECK: add.w r[[N:[0-9]*]], sp, #16 +; CHECK: stm.w r[[N]], {r1, r2, r3} +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +declare void @llvm.va_start(i8*) #1 +declare void @llvm.va_end(i8*) #1 + +attributes #0 = { nounwind optsize} +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll new file mode 100644 index 0000000..ef0912e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll @@ -0,0 +1,101 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +; C++ +; int g(int); +; +; int f(int n, ...) { +; __builtin_va_list ap; +; __builtin_va_start(ap, n); +; int s = 0; +; for (int i = 0; i < n; ++i) +; s += g(__builtin_va_arg(ap, int)); +; __builtin_va_end(ap); +; return s; +; } + +%"struct.std::__va_list" = type { i8* } + +define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 { +entry: + %ap = alloca %"struct.std::__va_list", align 4 + %0 = bitcast %"struct.std::__va_list"* %ap to i8* + call void @llvm.va_start(i8* nonnull %0) + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %1 = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* %ap, i32 0, i32 0 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + ret i32 %s.0.lcssa + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %s.08 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %argp.cur = load i8*, i8** %1, align 4 + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %1, align 4 + %2 = bitcast i8* %argp.cur to i32* + %3 = load i32, i32* %2, align 4 + %call = call i32 @_Z1gi(i32 %3) + %add = add nsw i32 %call, %s.08 + %inc = add nuw nsw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: _Z1fiz: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset r7, -20 +; CHECK-NEXT: .cfi_offset r5, -24 +; CHECK-NEXT: .cfi_offset r4, -28 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset ra_auth_code, -32 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; ... +; CHECK: add r[[N:[0-9]*]], sp, #28 +; CHECK: stm r[[N]]!, {r1, r2, r3} +; ... +; CHECK: add sp, #8 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +declare void @llvm.va_start(i8*) #1 +declare void @llvm.va_end(i8*) #1 + +declare dso_local i32 @_Z1gi(i32) local_unnamed_addr + +attributes #0 = { optsize } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress +; UNWIND: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x0B ; pop {r4, r5, r7, lr} +; UNWIND-NEXT: 0x02 ; vsp = vsp + 12 diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll new file mode 100644 index 0000000..b1881ee --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll @@ -0,0 +1,125 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +; int g(int, int *); +; +; int f(int n) { +; int a[n]; +; g(n, a); +; int s = 0; +; for (int i = 0; i < n; ++i) +; s += a[i]; +; return s; +; } + +define hidden i32 @f(i32 %n) local_unnamed_addr #0 { +entry: + %vla = alloca i32, i32 %n, align 4 + %call = call i32 @g(i32 %n, i32* nonnull %vla) #0 + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %0 = add i32 %n, -1 + %xtraiter = and i32 %n, 3 + %1 = icmp ult i32 %0, 3 + br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new + +for.body.preheader.new: ; preds = %for.body.preheader + %unroll_iter = and i32 %n, -4 + br label %for.body + +for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body.preheader + %add.lcssa.ph = phi i32 [ undef, %for.body.preheader ], [ %add.3, %for.body ] + %i.010.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.3, %for.body ] + %s.09.unr = phi i32 [ 0, %for.body.preheader ], [ %add.3, %for.body ] + %lcmp.mod.not = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil + +for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa + %arrayidx.epil = getelementptr inbounds i32, i32* %vla, i32 %i.010.unr + %2 = load i32, i32* %arrayidx.epil, align 4 + %add.epil = add nsw i32 %2, %s.09.unr + %epil.iter.cmp.not = icmp eq i32 %xtraiter, 1 + br i1 %epil.iter.cmp.not, label %for.cond.cleanup, label %for.body.epil.1 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil.2, %for.body.epil.1, %for.body.epil, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa.ph, %for.cond.cleanup.loopexit.unr-lcssa ], [ %add.epil, %for.body.epil ], [ %add.epil.1, %for.body.epil.1 ], [ %add.epil.2, %for.body.epil.2 ] + ret i32 %s.0.lcssa + +for.body: ; preds = %for.body, %for.body.preheader.new + %i.010 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] + %s.09 = phi i32 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] + %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %vla, i32 %i.010 + %3 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %3, %s.09 + %inc = or i32 %i.010, 1 + %arrayidx.1 = getelementptr inbounds i32, i32* %vla, i32 %inc + %4 = load i32, i32* %arrayidx.1, align 4 + %add.1 = add nsw i32 %4, %add + %inc.1 = or i32 %i.010, 2 + %arrayidx.2 = getelementptr inbounds i32, i32* %vla, i32 %inc.1 + %5 = load i32, i32* %arrayidx.2, align 4 + %add.2 = add nsw i32 %5, %add.1 + %inc.2 = or i32 %i.010, 3 + %arrayidx.3 = getelementptr inbounds i32, i32* %vla, i32 %inc.2 + %6 = load i32, i32* %arrayidx.3, align 4 + %add.3 = add nsw i32 %6, %add.2 + %inc.3 = add nuw nsw i32 %i.010, 4 + %niter.nsub.3 = add i32 %niter, -4 + %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 + br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body + +for.body.epil.1: ; preds = %for.body.epil + %inc.epil = add nuw nsw i32 %i.010.unr, 1 + %arrayidx.epil.1 = getelementptr inbounds i32, i32* %vla, i32 %inc.epil + %7 = load i32, i32* %arrayidx.epil.1, align 4 + %add.epil.1 = add nsw i32 %7, %add.epil + %epil.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 + br i1 %epil.iter.cmp.1.not, label %for.cond.cleanup, label %for.body.epil.2 + +for.body.epil.2: ; preds = %for.body.epil.1 + %inc.epil.1 = add nuw nsw i32 %i.010.unr, 2 + %arrayidx.epil.2 = getelementptr inbounds i32, i32* %vla, i32 %inc.epil.1 + %8 = load i32, i32* %arrayidx.epil.2, align 4 + %add.epil.2 = add nsw i32 %8, %add.epil.1 + br label %for.cond.cleanup +} + +; CHECK-LABEL: f: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: .setfp r7, sp, #12 +; CHECK-NEXT: add r7, sp, #12 +; CHECK-NEXT: .cfi_def_cfa r7, 8 +; CHECK-NEXT: .save {r8, r9, ra_auth_code} +; CHECK-NEXT: push.w {r8, r9, r12} +; CHECK-NEXT: .cfi_offset ra_auth_code, -24 +; CHECK-NEXT: .cfi_offset r9, -28 +; CHECK-NEXT: .cfi_offset r8, -32 +; ... +; CHECK: sub.w r[[N:[0-9]*]], r7, #24 +; CHECK-NEXT: mov sp, r[[N]] +; CHECK-NEXT: pop.w {r8, r9, r12} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +declare dso_local i32 @g(i32, i32*) local_unnamed_addr #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/MC/ARM/ra-auth-code-errors.s b/llvm/test/MC/ARM/ra-auth-code-errors.s new file mode 100644 index 0000000..0c33f34 --- /dev/null +++ b/llvm/test/MC/ARM/ra-auth-code-errors.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=thumbv7 %s -o - 2>&1 | FileCheck %s --strict-whitespace + .text + .syntax unified + .code 16 + .thumb_func + .global f +f: + .fnstart + .save {r11-ra_auth_code} +// CHECK: [[# @LINE - 1]]:16: error: pseudo-register not allowed +// CHECK-NEXT: .save {r11-ra_auth_code} +// CHECK-NEXT: ^ + .save {r11, ra_auth_code, r12} +// CHECK: [[# @LINE - 1]]:31: warning: duplicated register (r12) in register list +// CHECK-NEXT: .save {r11, ra_auth_code, r12} +// CHECK-NEXT: ^ + .save {ra_auth_code-r13} +// CHECK: [[# @LINE - 1]]:12: error: pseudo-register not allowed +// CHECK-NEXT: .save {ra_auth_code-r13} +// CHECK-NEXT: ^ + push {ra_auth_code} +// CHECK: [[# @LINE - 1]]:11: error: pseudo-register not allowed +// CHECK-NEXT: push {ra_auth_code} +// CHECK-NEXT: ^ + push {r11, ra_auth_code} +// CHECK: [[# @LINE - 1]]:16: error: pseudo-register not allowed +// CHECK-NEXT: push {r11, ra_auth_code} +// CHECK-NEXT: ^ + push {ra_auth_code, r12} +// CHECK: [[# @LINE - 1]]:11: error: pseudo-register not allowed +// CHECK-NEXT: push {ra_auth_code, r12} +// CHECK-NEXT: ^ + push {ra_auth_code, r13} +// CHECK: [[# @LINE - 1]]:11: error: pseudo-register not allowed +// CHECK-NEXT: push {ra_auth_code, r13} +// CHECK-NEXT: ^ diff --git a/llvm/test/MC/ARM/ra-auth-code.s b/llvm/test/MC/ARM/ra-auth-code.s new file mode 100644 index 0000000..218469b --- /dev/null +++ b/llvm/test/MC/ARM/ra-auth-code.s @@ -0,0 +1,24 @@ +// RUN: llvm-mc -triple=thumbv7 -filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s + + .syntax unified + .code 16 + .thumb_func + .global f +f: + .fnstart + .save {ra_auth_code} + .save {ra_auth_code, r13} + .save {r11, ra_auth_code, r13} + .save {r11, ra_auth_code} + .fnend +// CHECK-LABEL: Opcodes [ +// CHECK-NEXT: 0x80 0x80 ; pop {fp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0x80 0x80 ; pop {fp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0x82 0x00 ; pop {sp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0x82 0x00 ; pop {sp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0xB0 ; finish +// CHECK-NEXT: 0xB0 ; finish diff --git a/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt b/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt index d8a7cf1..5c798f6 100644 --- a/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt +++ b/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt @@ -6,8 +6,8 @@ # CHECK: subw sp, sp, #1148 # CHECK-SAME: -# CHECK-NEXT: +# CHECK-NEXT: +# CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: > @@ -16,8 +16,8 @@ # CHECK: sub.w sp, sp, #1024 # CHECK-SAME: -# CHECK-NEXT: +# CHECK-NEXT: +# CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: @@ -27,8 +27,8 @@ # CHECK: subs.w sp, sp, #1024 # CHECK-SAME: -# CHECK-NEXT: +# CHECK-NEXT: +# CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: diff --git a/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s b/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s index ef17118..8bd7e01 100644 --- a/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s +++ b/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s @@ -102,6 +102,7 @@ raw: .unwind_raw 8, 0xa1 .unwind_raw 12, 0xa9 .unwind_raw 0, 0xb0 + .unwind_raw 4, 0xb4 .unwind_raw 4, 0xb1, 0x01 .unwind_raw 0xa04, 0xb2, 0x80, 0x04 .unwind_raw 24, 0xb3, 0x12 @@ -129,12 +130,10 @@ spare: .unwind_raw 0, 0xa0 .unwind_raw 0, 0xa8 .unwind_raw 0, 0xb0 - .unwind_raw 0, 0xb1, 0x00 .unwind_raw 4, 0xb1, 0x01 .unwind_raw 0, 0xb1, 0x10 .unwind_raw 0x204, 0xb2, 0x00 .unwind_raw 16, 0xb3, 0x00 - .unwind_raw 0, 0xb4 .unwind_raw 16, 0xb8 .unwind_raw 4, 0xc0 .unwind_raw 4, 0xc6, 0x00 @@ -277,6 +276,7 @@ spare: @ SYM: 0xB3 0x12 ; pop {d1, d2, d3} @ SYM: 0xB2 0x80 0x04 ; vsp = vsp + 2564 @ SYM: 0xB1 0x01 ; pop {r0} +@ SYM: 0xB4 ; pop ra_auth_code @ SYM: 0xB0 ; finish @ SYM: 0xA9 ; pop {r4, r5, lr} @ SYM: 0xA1 ; pop {r4, r5} @@ -306,12 +306,10 @@ spare: @ SYM: 0xC6 0x00 ; pop {wR0} @ SYM: 0xC0 ; pop {wR10} @ SYM: 0xB8 ; pop {d8} -@ SYM: 0xB4 ; spare @ SYM: 0xB3 0x00 ; pop {d0} @ SYM: 0xB2 0x00 ; vsp = vsp + 516 @ SYM: 0xB1 0x10 ; spare @ SYM: 0xB1 0x01 ; pop {r0} -@ SYM: 0xB1 0x00 ; spare @ SYM: 0xB0 ; finish @ SYM: 0xA8 ; pop {r4, lr} @ SYM: 0xA0 ; pop {r4} diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h index d97cea4..94f1672 100644 --- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h +++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h @@ -158,9 +158,8 @@ inline void OpcodeDecoder::Decode_10110001_0000iiii(const uint8_t *Opcodes, uint8_t Opcode0 = Opcodes[OI++ ^ 3]; uint8_t Opcode1 = Opcodes[OI++ ^ 3]; - SW.startLine() - << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, - ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + SW.startLine() << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + (Opcode1 & 0xf0) ? "spare" : "pop "); if (((Opcode1 & 0xf0) == 0x00) && Opcode1) PrintGPR((Opcode1 & 0x0f)); OS << '\n'; @@ -195,7 +194,8 @@ inline void OpcodeDecoder::Decode_10110011_sssscccc(const uint8_t *Opcodes, inline void OpcodeDecoder::Decode_101101nn(const uint8_t *Opcodes, unsigned &OI) { uint8_t Opcode = Opcodes[OI++ ^ 3]; - SW.startLine() << format("0x%02X ; spare\n", Opcode); + SW.startLine() << format("0x%02X ; %s\n", Opcode, + (Opcode == 0xb4) ? "pop ra_auth_code" : "spare"); } inline void OpcodeDecoder::Decode_10111nnn(const uint8_t *Opcodes, unsigned &OI) { -- 2.7.4