}
}
-static bool isPairedLdSt(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return false;
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPDi:
- case AArch64::LDPQi:
- case AArch64::LDPWi:
- case AArch64::LDPXi:
- case AArch64::STPSi:
- case AArch64::STPDi:
- case AArch64::STPQi:
- case AArch64::STPWi:
- case AArch64::STPXi:
- case AArch64::STGPi:
- return true;
- }
-}
-
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
unsigned OpcA = FirstMI.getOpcode();
// Returns the scale and offset range of pre/post indexed variants of MI.
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
int &MinOffset, int &MaxOffset) {
- bool IsPaired = isPairedLdSt(MI);
+ bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
bool IsTagStore = isTagStore(MI);
// ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant.
bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
if (IsPreLdSt)
PairedRegOp += 1;
- unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
- return MI.getOperand(Idx);
-}
-
-static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
- unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
- return MI.getOperand(Idx);
-}
-
-static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
- unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
+ unsigned Idx =
+ AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
return MI.getOperand(Idx);
}
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = TII->getMemScale(LoadInst);
int StoreSize = TII->getMemScale(StoreInst);
- int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
- ? getLdStOffsetOp(StoreInst).getImm()
- : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
- int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
- ? getLdStOffsetOp(LoadInst).getImm()
- : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
+ int UnscaledStOffset =
+ TII->hasUnscaledLdStOffset(StoreInst)
+ ? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize;
+ int UnscaledLdOffset =
+ TII->hasUnscaledLdStOffset(LoadInst)
+ ? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize;
return (UnscaledStOffset <= UnscaledLdOffset) &&
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
case AArch64::STPWi:
case AArch64::STPXi:
// Make sure this is a reg+imm (as opposed to an address reloc).
- if (!getLdStOffsetOp(MI).isImm())
+ if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;
return true;
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
- MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
+ MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
+ : AArch64InstrInfo::getLdStBaseOp(*I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI;
- if (getLdStOffsetOp(*I).getImm() ==
- getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
+ if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() ==
+ AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
RtMI = &*MergeMI;
else
RtMI = &*I;
- int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
+ int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Change the scaled offset from small to large type.
if (IsScaled) {
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
- MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
+ MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
+ : AArch64InstrInfo::getLdStBaseOp(*I);
- int Offset = getLdStOffsetOp(*I).getImm();
- int PairedOffset = getLdStOffsetOp(*Paired).getImm();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm();
+ int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
if (IsUnscaled != PairedIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled. If
RtMI = &*I;
Rt2MI = &*Paired;
}
- int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
+ int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.
if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
"Unsupported ld/st match");
assert(LoadSize <= StoreSize && "Invalid load size");
- int UnscaledLdOffset = IsUnscaled
- ? getLdStOffsetOp(*LoadI).getImm()
- : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
- int UnscaledStOffset = IsUnscaled
- ? getLdStOffsetOp(*StoreI).getImm()
- : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
+ int UnscaledLdOffset =
+ IsUnscaled
+ ? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
+ int UnscaledStOffset =
+ IsUnscaled
+ ? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
Register DestReg =
IsStoreXReg ? Register(TRI->getMatchingSuperReg(
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator MBBI = I;
MachineInstr &LoadMI = *I;
- Register BaseReg = getLdStBaseOp(LoadMI).getReg();
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
// If the load is the first instruction in the block, there's obviously
// not any matching store.
// Also we can't handle stores without an immediate offset operand,
// while the operand might be the address for a global variable.
if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
- BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
+ BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
+ AArch64InstrInfo::getLdStOffsetOp(MI).isImm() &&
isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
StoreI = MBBI;
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
Register Reg = getLdStRegOp(FirstMI).getReg();
- Register BaseReg = getLdStBaseOp(FirstMI).getReg();
- int Offset = getLdStOffsetOp(FirstMI).getImm();
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
Flags.setSExtIdx(-1);
if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
- getLdStOffsetOp(MI).isImm()) {
+ AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) {
assert(MI.mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for
// a relocation.
- Register MIBaseReg = getLdStBaseOp(MI).getReg();
- int MIOffset = getLdStOffsetOp(MI).getImm();
+ Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg();
+ int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
if (IsUnscaled != MIIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled.
// can't be paired: bail and keep looking.
if (IsPreLdSt) {
bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
- bool IsBaseRegUsed =
- !UsedRegUnits.available(getLdStBaseOp(MI).getReg());
- bool IsBaseRegModified =
- !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
+ bool IsBaseRegUsed = !UsedRegUnits.available(
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg());
+ bool IsBaseRegModified = !ModifiedRegUnits.available(
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg());
// If the stored value and the address of the second instruction is
// the same, it needs to be using the updated register and therefore
// it must not be folded.
- bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
- getLdStBaseOp(MI).getReg());
+ bool IsMIRegTheSame =
+ TRI->regsOverlap(getLdStRegOp(MI).getReg(),
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg());
if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
IsMIRegTheSame) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
!(MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy)) ||
- getLdStBaseOp(MI).getReg() != AArch64::SP)
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
return End;
const MachineFunction &MF = *MI.getParent()->getParent();
MachineInstrBuilder MIB;
int Scale, MinOffset, MaxOffset;
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
- if (!isPairedLdSt(*I)) {
+ if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I))
- .add(getLdStBaseOp(*I))
+ .add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
- .add(getLdStBaseOp(*I))
+ .add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
- Register BaseReg = getLdStBaseOp(MemMI).getReg();
- int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
+ int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
+ TII->getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're
// behavior in this case unlike normal stores, and always performs writeback
// after reading the source register value.
if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
- bool IsPairedInsn = isPairedLdSt(MemMI);
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
MachineBasicBlock::iterator MBBI = I;
MachineFunction &MF = *MemMI.getMF();
- Register BaseReg = getLdStBaseOp(MemMI).getReg();
- int Offset = getLdStOffsetOp(MemMI).getImm();
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
// If the base register overlaps a destination register, we can't
// merge the update.
if (!isTagStore(MemMI)) {
- bool IsPairedInsn = isPairedLdSt(MemMI);
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
// Make sure this is a reg+imm.
// FIXME: It is possible to extend it to handle reg+reg cases.
- if (!getLdStOffsetOp(MI).isImm())
+ if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;
// Look backward up to LdStLimit instructions.
// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1)
bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
- int Offset = getLdStOffsetOp(MI).getImm();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.
if (Offset > 0)
// The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for,
// however, is not, so adjust here.
- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
+ int UnscaledOffset =
+ AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
// Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]
//===----------------------------------------------------------------------===//
#include "AArch64MachineScheduler.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
using namespace llvm;
+static bool needReorderStoreMI(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STURQi:
+ case AArch64::STRQui:
+ if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend())
+ return false;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPQi:
+ return AArch64InstrInfo::getLdStOffsetOp(*MI).getType() == MachineOperand::MO_Immediate;
+ }
+
+ return false;
+}
+
+// Return true if two stores with same base address may overlap writes
+static bool mayOverlapWrite(const MachineInstr &MI0, const MachineInstr &MI1,
+ int64_t &Off0, int64_t &Off1) {
+ const MachineOperand &Base0 = AArch64InstrInfo::getLdStBaseOp(MI0);
+ const MachineOperand &Base1 = AArch64InstrInfo::getLdStBaseOp(MI1);
+
+ // May overlapping writes if two store instructions without same base
+ if (!Base0.isIdenticalTo(Base1))
+ return true;
+
+ int StoreSize0 = AArch64InstrInfo::getMemScale(MI0);
+ int StoreSize1 = AArch64InstrInfo::getMemScale(MI1);
+ Off0 = AArch64InstrInfo::hasUnscaledLdStOffset(MI0.getOpcode())
+ ? AArch64InstrInfo::getLdStOffsetOp(MI0).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(MI0).getImm() * StoreSize0;
+ Off1 = AArch64InstrInfo::hasUnscaledLdStOffset(MI1.getOpcode())
+ ? AArch64InstrInfo::getLdStOffsetOp(MI1).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(MI1).getImm() * StoreSize1;
+
+ const MachineInstr &MI = (Off0 < Off1) ? MI0 : MI1;
+ int Multiples = AArch64InstrInfo::isPairedLdSt(MI) ? 2 : 1;
+ int StoreSize = AArch64InstrInfo::getMemScale(MI) * Multiples;
+
+ return llabs(Off0 - Off1) < StoreSize;
+}
+
bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand);
if (Cand.isValid()) {
MachineInstr *Instr0 = TryCand.SU->getInstr();
MachineInstr *Instr1 = Cand.SU->getInstr();
- // When dealing with two STPqi's.
- if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode () &&
- Instr0->getOpcode() == AArch64::STPQi)
- {
- MachineOperand &Base0 = Instr0->getOperand(2);
- MachineOperand &Base1 = Instr1->getOperand(2);
- int64_t Off0 = Instr0->getOperand(3).getImm();
- int64_t Off1 = Instr1->getOperand(3).getImm();
- // With the same base address and non-overlapping writes.
- if (Base0.isIdenticalTo(Base1) && llabs (Off0 - Off1) >= 2) {
- TryCand.Reason = NodeOrder;
- // Order them by ascending offsets.
- return Off0 < Off1;
- }
+
+ if (!needReorderStoreMI(Instr0) || !needReorderStoreMI(Instr1))
+ return OriginalResult;
+
+ int64_t Off0, Off1;
+ // With the same base address and non-overlapping writes.
+ if (!mayOverlapWrite(*Instr0, *Instr1, Off0, Off1)) {
+ TryCand.Reason = NodeOrder;
+ // Order them by ascending offsets.
+ return Off0 < Off1;
}
}