MachineInstr::MIFlag Flag) const {
const LoongArchInstrInfo *TII = STI.getInstrInfo();
bool IsLA64 = STI.is64Bit();
+ unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
if (DestReg == SrcReg && Val == 0)
return;
if (isInt<12>(Val)) {
// addi.w/d $DstReg, $SrcReg, Val
- BuildMI(MBB, MBBI, DL,
- TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg)
+ BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
.addReg(SrcReg)
.addImm(Val)
.setMIFlag(Flag);
return;
}
- report_fatal_error("adjustReg cannot yet handle adjustments >12 bits");
+ // Try to split the offset across two ADDIs. We need to keep the stack pointer
+ // aligned after each ADDI. We need to determine the maximum value we can put
+ // in each ADDI. In the negative direction, we can use -2048 which is always
+ // sufficiently aligned. In the positive direction, we need to find the
+ // largest 12-bit immediate that is aligned. Exclude -4096 since it can be
+ // created with LU12I.W.
+ assert(getStackAlign().value() < 2048 && "Stack alignment too large");
+ int64_t MaxPosAdjStep = 2048 - getStackAlign().value();
+ if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) {
+ int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep;
+ Val -= FirstAdj;
+ BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
+ .addReg(SrcReg)
+ .addImm(FirstAdj)
+ .setMIFlag(Flag);
+ BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(Val)
+ .setMIFlag(Flag);
+ return;
+ }
+
+ unsigned Opc = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
+ if (Val < 0) {
+ Val = -Val;
+ Opc = IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W;
+ }
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
+ BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+ .addReg(SrcReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .setMIFlag(Flag);
}
// Determine the size of the frame and maximum call frame size.
#include "LoongArchInstrInfo.h"
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
+#include "MCTargetDesc/LoongArchMatInt.h"
using namespace llvm;
LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI)
: LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN,
- LoongArch::ADJCALLSTACKUP) {}
+ LoongArch::ADJCALLSTACKUP),
+ STI(STI) {}
void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
.addMemOperand(MMO);
}
+void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DstReg,
+ uint64_t Val, MachineInstr::MIFlag Flag) const {
+ Register SrcReg = LoongArch::R0;
+
+ if (!STI.is64Bit() && !isInt<32>(Val))
+ report_fatal_error("Should only materialize 32-bit constants for LA32");
+
+ auto Seq = LoongArchMatInt::generateInstSeq(Val);
+ assert(!Seq.empty());
+
+ for (auto &Inst : Seq) {
+ switch (Inst.Opc) {
+ case LoongArch::LU12I_W:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
+ .addImm(Inst.Imm)
+ .setMIFlag(Flag);
+ break;
+ case LoongArch::ADDI_W:
+ case LoongArch::ORI:
+ case LoongArch::LU32I_D: // "rj" is needed due to InstrInfo pattern
+ case LoongArch::LU52I_D:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(Inst.Imm)
+ .setMIFlag(Flag);
+ break;
+ default:
+ assert(false && "Unknown insn emitted by LoongArchMatInt");
+ }
+
+ // Only the first instruction has $zero as its source.
+ SrcReg = DstReg;
+ }
+}
+
unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return MI.getDesc().getSize();
}
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // Materializes the given integer Val into DstReg.
+ void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DstReg, uint64_t Val,
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
+
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+protected:
+ const LoongArchSubtarget &STI;
};
} // end namespace llvm
#include "LoongArchRegisterInfo.h"
#include "LoongArch.h"
+#include "LoongArchInstrInfo.h"
#include "LoongArchSubtarget.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MI.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const LoongArchSubtarget &STI = MF.getSubtarget<LoongArchSubtarget>();
+ const LoongArchInstrInfo *TII = STI.getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc DL = MI.getDebugLoc();
TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) +
StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
- // Offsets must be encodable with a 12-bit immediate field.
+ bool FrameRegIsKill = false;
+
if (!isInt<12>(Offset.getFixed())) {
- report_fatal_error("Frame offsets outside of the signed 12-bit range is "
- "not supported currently");
+ unsigned Addi = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+ unsigned Add = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
+
+ // The offset won't fit in an immediate, so use a scratch register instead.
+ // Modify Offset and FrameReg appropriately.
+ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed());
+ if (MI.getOpcode() == Addi) {
+ BuildMI(MBB, II, DL, TII->get(Add), MI.getOperand(0).getReg())
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ MI.eraseFromParent();
+ return;
+ }
+ BuildMI(MBB, II, DL, TII->get(Add), ScratchReg)
+ .addReg(FrameReg)
+ .addReg(ScratchReg, RegState::Kill);
+ Offset = StackOffset::getFixed(0);
+ FrameReg = ScratchReg;
+ FrameRegIsKill = true;
}
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
}
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
};
} // end namespace llvm
%struct.key_t = type { i32, [16 x i8] }
+declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)
+declare void @test1(ptr)
+
define i32 @test() nounwind {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
ret i32 0
}
-declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)
+;; Should involve only one SP-adjusting addi per adjustment.
+define void @test_large_frame_size_2032() {
+; CHECK-LABEL: test_large_frame_size_2032:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.d $sp, $sp, -2032
+; CHECK-NEXT: .cfi_def_cfa_offset 2032
+; CHECK-NEXT: addi.d $sp, $sp, 2032
+; CHECK-NEXT: ret
+ %1 = alloca i8, i32 2032
+ ret void
+}
-declare void @test1(ptr)
+;; Should involve two SP-adjusting addi's when adjusting SP up, but only one
+;; when adjusting down.
+define void @test_large_frame_size_2048() {
+; CHECK-LABEL: test_large_frame_size_2048:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.d $sp, $sp, -2048
+; CHECK-NEXT: .cfi_def_cfa_offset 2048
+; CHECK-NEXT: addi.d $sp, $sp, 2032
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+ %1 = alloca i8, i32 2048
+ ret void
+}
+
+;; Should involve two SP-adjusting addi's per adjustment.
+define void @test_large_frame_size_2064() {
+; CHECK-LABEL: test_large_frame_size_2064:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.d $sp, $sp, -2048
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 2064
+; CHECK-NEXT: addi.d $sp, $sp, 2032
+; CHECK-NEXT: addi.d $sp, $sp, 32
+; CHECK-NEXT: ret
+ %1 = alloca i8, i32 2064
+ ret void
+}
+
+;; SP should be adjusted with help of a scratch register.
+define void @test_large_frame_size_1234576() {
+; CHECK-LABEL: test_large_frame_size_1234576:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a0, 301
+; CHECK-NEXT: ori $a0, $a0, 1680
+; CHECK-NEXT: sub.d $sp, $sp, $a0
+; CHECK-NEXT: .cfi_def_cfa_offset 1234576
+; CHECK-NEXT: lu12i.w $a0, 301
+; CHECK-NEXT: ori $a0, $a0, 1680
+; CHECK-NEXT: add.d $sp, $sp, $a0
+; CHECK-NEXT: ret
+ %1 = alloca i8, i32 1234567
+ ret void
+}