// Use !IsLiveIn for the kill flag.
// We do not want to kill registers that are live in this function
// before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ else
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
+ RC, TRI);
}
}
}
} else {
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
+ TRI);
+ else
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
assert(I != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
}
FuncInfo->setHasNonRISpills();
}
-void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- Register SrcReg, bool isKill,
- int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void PPCInstrInfo::storeRegToStackSlotNoUpd(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg,
+ bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr *, 4> NewMIs;
- // We need to avoid a situation in which the value from a VRRC register is
- // spilled using an Altivec instruction and reloaded into a VSRC register
- // using a VSX instruction. The issue with this is that the VSX
- // load/store instructions swap the doublewords in the vector and the Altivec
- // ones don't. The register classes on the spill/reload may be different if
- // the register is defined using an Altivec instruction and is then used by a
- // VSX instruction.
- RC = updatedRC(RC);
-
StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
NewMIs.back()->addMemOperand(MF, MMO);
}
+void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ // We need to avoid a situation in which the value from a VRRC register is
+ // spilled using an Altivec instruction and reloaded into a VSRC register
+ // using a VSX instruction. The issue with this is that the VSX
+ // load/store instructions swap the doublewords in the vector and the Altivec
+ // ones don't. The register classes on the spill/reload may be different if
+ // the register is defined using an Altivec instruction and is then used by a
+ // VSX instruction.
+ RC = updatedRC(RC);
+ storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
+}
+
void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
FuncInfo->setHasNonRISpills();
}
-void
-PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- Register DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void PPCInstrInfo::loadRegFromStackSlotNoUpd(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg,
+ int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasSpills();
- // We need to avoid a situation in which the value from a VRRC register is
- // spilled using an Altivec instruction and reloaded into a VSRC register
- // using a VSX instruction. The issue with this is that the VSX
- // load/store instructions swap the doublewords in the vector and the Altivec
- // ones don't. The register classes on the spill/reload may be different if
- // the register is defined using an Altivec instruction and is then used by a
- // VSX instruction.
- if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
- RC = &PPC::VSRCRegClass;
-
LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
NewMIs.back()->addMemOperand(MF, MMO);
}
+void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ // We need to avoid a situation in which the value from a VRRC register is
+ // spilled using an Altivec instruction and reloaded into a VSRC register
+ // using a VSX instruction. The issue with this is that the VSX
+ // load/store instructions swap the doublewords in the vector and the Altivec
+ // ones don't. The register classes on the spill/reload may be different if
+ // the register is defined using an Altivec instruction and is then used by a
+ // VSX instruction.
+ RC = updatedRC(RC);
+
+ loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
+}
+
bool PPCInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // Emits a register spill without updating the register class for vector
+ // registers. This ensures that when we spill a vector register the
+ // element order in the register is the same as it was in memory.
+ void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // Emits a register reload without updating the register class for vector
+ // registers. This ensures that when we reload a vector register the
+ // element order in the register is the same as it was in memory.
+ void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
unsigned getStoreOpcodeForSpill(unsigned Reg,
const TargetRegisterClass *RC = nullptr) const;
; CHECK-PWR8-NEXT: .cfi_offset v20, -192
; CHECK-PWR8-NEXT: .cfi_offset v21, -176
; CHECK-PWR8-NEXT: li r5, 48
-; CHECK-PWR8-NEXT: stxvd2x v20, r1, r5 # 16-byte Folded Spill
+; CHECK-PWR8-NEXT: stvx v20, r1, r5 # 16-byte Folded Spill
; CHECK-PWR8-NEXT: li r5, 64
-; CHECK-PWR8-NEXT: stxvd2x v21, r1, r5 # 16-byte Folded Spill
+; CHECK-PWR8-NEXT: stvx v21, r1, r5 # 16-byte Folded Spill
; CHECK-PWR8-NEXT: #APP
; CHECK-PWR8-NEXT: add r3, r3, r4
; CHECK-PWR8-NEXT: #NO_APP
; CHECK-PWR8-NEXT: bl callee
; CHECK-PWR8-NEXT: nop
; CHECK-PWR8-NEXT: li r4, 64
-; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT: lvx v21, r1, r4 # 16-byte Folded Reload
; CHECK-PWR8-NEXT: li r4, 48
-; CHECK-PWR8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT: lvx v20, r1, r4 # 16-byte Folded Reload
; CHECK-PWR8-NEXT: addi r1, r1, 240
; CHECK-PWR8-NEXT: ld r0, 16(r1)
; CHECK-PWR8-NEXT: mtlr r0
; CHECK-PWR8-NEXT: .cfi_offset v20, -192
; CHECK-PWR8-NEXT: .cfi_offset v21, -176
; CHECK-PWR8-NEXT: li r5, 48
-; CHECK-PWR8-NEXT: stxvd2x v20, r1, r5 # 16-byte Folded Spill
+; CHECK-PWR8-NEXT: stvx v20, r1, r5 # 16-byte Folded Spill
; CHECK-PWR8-NEXT: li r5, 64
-; CHECK-PWR8-NEXT: stxvd2x v21, r1, r5 # 16-byte Folded Spill
+; CHECK-PWR8-NEXT: stvx v21, r1, r5 # 16-byte Folded Spill
; CHECK-PWR8-NEXT: #APP
; CHECK-PWR8-NEXT: add r3, r3, r4
; CHECK-PWR8-NEXT: #NO_APP
; CHECK-PWR8-NEXT: bl callee
; CHECK-PWR8-NEXT: nop
; CHECK-PWR8-NEXT: li r4, 64
-; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT: lvx v21, r1, r4 # 16-byte Folded Reload
; CHECK-PWR8-NEXT: li r4, 48
-; CHECK-PWR8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT: lvx v20, r1, r4 # 16-byte Folded Reload
; CHECK-PWR8-NEXT: addi r1, r1, 240
; CHECK-PWR8-NEXT: ld r0, 16(r1)
; CHECK-PWR8-NEXT: mtlr r0
; CHECK-PWR8-NEXT: li r5, 48
; CHECK-PWR8-NEXT: std r14, 240(r1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: stfd f14, 384(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: stxvd2x v20, r1, r5 # 16-byte Folded Spill
+; CHECK-PWR8-NEXT: stvx v20, r1, r5 # 16-byte Folded Spill
; CHECK-PWR8-NEXT: li r5, 64
-; CHECK-PWR8-NEXT: stxvd2x v21, r1, r5 # 16-byte Folded Spill
+; CHECK-PWR8-NEXT: stvx v21, r1, r5 # 16-byte Folded Spill
; CHECK-PWR8-NEXT: #APP
; CHECK-PWR8-NEXT: add r3, r3, r4
; CHECK-PWR8-NEXT: #NO_APP
; CHECK-PWR8-NEXT: li r4, 64
; CHECK-PWR8-NEXT: lfd f14, 384(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r14, 240(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT: lvx v21, r1, r4 # 16-byte Folded Reload
; CHECK-PWR8-NEXT: li r4, 48
-; CHECK-PWR8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT: lvx v20, r1, r4 # 16-byte Folded Reload
; CHECK-PWR8-NEXT: addi r1, r1, 528
; CHECK-PWR8-NEXT: ld r0, 16(r1)
; CHECK-PWR8-NEXT: mtlr r0
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset v20, -192
; CHECK-NEXT: li r5, 48
-; CHECK-NEXT: stxvd2x v20, r1, r5 # 16-byte Folded Spill
+; CHECK-NEXT: stvx v20, r1, r5 # 16-byte Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: add r3, r3, r4
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: bl callee
; CHECK-NEXT: nop
; CHECK-NEXT: li r4, 48
-; CHECK-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT: lvx v20, r1, r4 # 16-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 240
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-DAG: li [[REG64:[0-9]+]], 64
; CHECK-DAG: li [[REG80:[0-9]+]], 80
; CHECK-DAG: li [[REG96:[0-9]+]], 96
-; CHECK-DAG: stxvd2x 60, 1, [[REG48]] # 16-byte Folded Spill
-; CHECK-DAG: stxvd2x 61, 1, [[REG64]] # 16-byte Folded Spill
-; CHECK-DAG: stxvd2x 62, 1, [[REG80]] # 16-byte Folded Spill
-; CHECK-DAG: stxvd2x 63, 1, [[REG96]] # 16-byte Folded Spill
+; CHECK-DAG: stvx 28, 1, [[REG48]] # 16-byte Folded Spill
+; CHECK-DAG: stvx 29, 1, [[REG64]] # 16-byte Folded Spill
+; CHECK-DAG: stvx 30, 1, [[REG80]] # 16-byte Folded Spill
+; CHECK-DAG: stvx 31, 1, [[REG96]] # 16-byte Folded Spill
; CHECK: .LBB0_3
; CHECK-DAG: li [[REG96_LD:[0-9]+]], 96
; CHECK-DAG: li [[REG80_LD:[0-9]+]], 80
; CHECK-DAG: li [[REG64_LD:[0-9]+]], 64
; CHECK-DAG: li [[REG48_LD:[0-9]+]], 48
-; CHECK-DAG: lxvd2x 63, 1, [[REG96_LD]] # 16-byte Folded Reload
-; CHECK-DAG: lxvd2x 62, 1, [[REG80_LD]] # 16-byte Folded Reload
-; CHECK-DAG: lxvd2x 61, 1, [[REG64_LD]] # 16-byte Folded Reload
-; CHECK-DAG: lxvd2x 60, 1, [[REG48_LD]] # 16-byte Folded Reload
+; CHECK-DAG: lvx 31, 1, [[REG96_LD]] # 16-byte Folded Reload
+; CHECK-DAG: lvx 30, 1, [[REG80_LD]] # 16-byte Folded Reload
+; CHECK-DAG: lvx 29, 1, [[REG64_LD]] # 16-byte Folded Reload
+; CHECK-DAG: lvx 28, 1, [[REG48_LD]] # 16-byte Folded Reload
; CHECK: mtlr 0
; CHECK-NEXT: blr
;