// instructions. May skip if the replacement is not profitable. May invalidate
// the input iterator and replace it with a valid one.
void emitCode(MachineBasicBlock::iterator &InsertI,
- const AArch64FrameLowering *TFI, bool IsLast);
+ const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
};
void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
}
void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
- const AArch64FrameLowering *TFI, bool IsLast) {
+ const AArch64FrameLowering *TFI,
+ bool TryMergeSPUpdate) {
if (TagStores.empty())
return;
TagStoreInstr &FirstTagStore = TagStores[0];
emitUnrolled(InsertI);
} else {
MachineInstr *UpdateInstr = nullptr;
- int64_t TotalOffset;
- if (IsLast) {
+ int64_t TotalOffset = 0;
+ if (TryMergeSPUpdate) {
// See if we can merge base register update into the STGloop.
// This is done in AArch64LoadStoreOptimizer for "normal" stores,
// but STGloop is way too unusual for that, and also it only
for (auto &Instr : Instrs) {
if (EndOffset && *EndOffset != Instr.Offset) {
// Found a gap.
- TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
+ TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false);
TSE.clear();
}
EndOffset = Instr.Offset + Instr.Size;
}
- TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
+ // Multiple FP/SP updates in a loop cannot be described by CFI instructions.
+ TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */
+ !MBB->getParent()
+ ->getInfo<AArch64FunctionInfo>()
+ ->needsAsyncDwarfUnwindInfo());
return InsertI;
}
// Create a scratch register for the frame index elimination in an instruction.
// This function has special handling of stack tagging loop pseudos, in which
-// case it can also change the instruction opcode (but not the operands).
+// case it can also change the instruction opcode.
static Register
-createScratchRegisterForInstruction(MachineInstr &MI,
+createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum,
const AArch64InstrInfo *TII) {
// ST*Gloop have a reserved scratch register in operand 1. Use it, and also
// replace the instruction with the writeback variant because it will now
// satisfy the operand constraints for it.
- if (MI.getOpcode() == AArch64::STGloop) {
- MI.setDesc(TII->get(AArch64::STGloop_wback));
- return MI.getOperand(1).getReg();
- } else if (MI.getOpcode() == AArch64::STZGloop) {
- MI.setDesc(TII->get(AArch64::STZGloop_wback));
- return MI.getOperand(1).getReg();
+ Register ScratchReg;
+ if (MI.getOpcode() == AArch64::STGloop ||
+ MI.getOpcode() == AArch64::STZGloop) {
+ assert(FIOperandNum == 3 &&
+ "Wrong frame index operand for STGloop/STZGloop");
+ unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback
+ : AArch64::STZGloop_wback;
+ ScratchReg = MI.getOperand(1).getReg();
+ MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true);
+ MI.setDesc(TII->get(Op));
+ MI.tieOperands(1, 3);
} else {
- return MI.getMF()->getRegInfo().createVirtualRegister(
- &AArch64::GPR64RegClass);
+ ScratchReg =
+ MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(ScratchReg, false, false, true);
}
+ return ScratchReg;
}
void AArch64RegisterInfo::getOffsetOpcodes(
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
- Register ScratchReg = createScratchRegisterForInstruction(MI, TII);
+ Register ScratchReg =
+ createScratchRegisterForInstruction(MI, FIOperandNum, TII);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
- MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
}
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
ret void
}
-define void @stg_alloca17() uwtable {
+define void @stg_alloca17() nounwind {
; CHECK-LABEL: stg_alloca17:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #288
-; CHECK-NEXT: .cfi_def_cfa_offset 288
-; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #256
+; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
; CHECK-NEXT: .LBB11_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: cbnz x8, .LBB11_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: stg sp, [sp], #16
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %a = alloca i8, i32 272, align 16
+ call void @llvm.aarch64.settag(i8* %a, i64 272)
+ ret void
+}
+
+define void @stg_alloca18() uwtable {
+; CHECK-LABEL: stg_alloca18:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #288
+; CHECK-NEXT: .cfi_def_cfa_offset 288
+; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: mov x8, #256
+; CHECK-NEXT: stg x9, [x9], #16
+; CHECK-NEXT: .LBB12_1: // %entry
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sub x8, x8, #32
+; CHECK-NEXT: st2g x9, [x9], #32
+; CHECK-NEXT: cbnz x8, .LBB12_1
+; CHECK-NEXT: // %bb.2: // %entry
+; CHECK-NEXT: add sp, sp, #272
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0