/// It returns false if no base operands and offset was found.
/// It is not guaranteed to always recognize base operands and offsets in all
/// cases.
- virtual bool
- getMemOperandsWithOffset(const MachineInstr &MI,
- SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable,
- const TargetRegisterInfo *TRI) const {
+ virtual bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
return false;
}
/// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
/// \p NumLoads is the number of loads that will be in the cluster if this
/// hook returns true.
+ /// \p NumBytes is the number of bytes that will be loaded from all the
+ /// clustered loads if this hook returns true.
virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads) const {
+ unsigned NumLoads, unsigned NumBytes) const {
llvm_unreachable("target did not implement shouldClusterMemOps()");
}
SUnit *SU;
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
+ unsigned Width;
MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
- int64_t Offset)
- : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {}
+ int64_t Offset, unsigned Width)
+ : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
+ Width(Width) {}
static bool Compare(const MachineOperand *const &A,
const MachineOperand *const &B) {
ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
+ const MachineInstr &MI = *SU->getInstr();
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
bool OffsetIsScalable;
- if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset,
- OffsetIsScalable, TRI))
- MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset));
+ unsigned Width;
+ if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+ OffsetIsScalable, Width, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
#ifndef NDEBUG
for (auto *Op : BaseOps)
assert(Op);
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
// cluster mem ops collected within `MemOpRecords` array.
unsigned ClusterLength = 1;
+ unsigned CurrentClusterBytes = MemOpRecords[0].Width;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
// Decision to cluster mem ops is taken based on target dependent logic
auto MemOpa = MemOpRecords[Idx];
auto MemOpb = MemOpRecords[Idx + 1];
++ClusterLength;
- if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
- ClusterLength)) {
+ CurrentClusterBytes += MemOpb.Width;
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+ CurrentClusterBytes)) {
// Current mem ops pair could not be clustered, reset cluster length, and
// go to next pair
ClusterLength = 1;
+ CurrentClusterBytes = MemOpb.Width;
continue;
}
// FIXME: Is this check really required?
if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
ClusterLength = 1;
+ CurrentClusterBytes = MemOpb.Width;
continue;
}
const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
SmallVector<const MachineOperand *, 4> BaseOps;
- if (!getMemOperandsWithOffset(MI, BaseOps, Offset, OffsetIsScalable, TRI) ||
+ unsigned Width;
+ if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
+ Width, TRI) ||
BaseOps.size() != 1)
return false;
BaseOp = BaseOps.front();
return true;
}
-bool AArch64InstrInfo::getMemOperandsWithOffset(
+bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
- const {
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
const MachineOperand *BaseOp;
- unsigned Width;
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
Width, TRI))
return false;
/// Only called for LdSt for which getMemOperandWithOffset returns true.
bool AArch64InstrInfo::shouldClusterMemOps(
ArrayRef<const MachineOperand *> BaseOps1,
- ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads) const {
+ ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ unsigned NumBytes) const {
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
const MachineOperand &BaseOp1 = *BaseOps1.front();
const MachineOperand &BaseOp2 = *BaseOps2.front();
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
- bool getMemOperandsWithOffset(
+ bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
- const override;
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
/// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
/// This is true for some SVE instructions like ldr/str that have a
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads) const override;
+ unsigned NumLoads, unsigned NumBytes) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
int64_t Dummy1;
bool Dummy2;
+ unsigned Dummy3;
SmallVector<const MachineOperand *, 4> BaseOps;
if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
- if (!SII->getMemOperandsWithOffset(MI, BaseOps, Dummy1, Dummy2,
- TRI)) {
+ if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
+ Dummy3, TRI)) {
// We failed to get the base operands, so we'll never clause this
// instruction with any other, so pretend it's illegal.
Type = HARDCLAUSE_ILLEGAL;
// scheduler it limits the size of the cluster to avoid increasing
// register pressure too much, but this pass runs after register
// allocation so there is no need for that kind of limit.
- !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2)))) {
+ !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
// Finish the current clause.
Changed |= emitClause(CI, SII);
CI = ClauseInfo();
}
}
-bool SIInstrInfo::getMemOperandsWithOffset(
+unsigned SIInstrInfo::getOperandSizeInBytes(const MachineInstr &LdSt,
+ const MachineOperand *MOp) const {
+ assert(MOp && "Unexpected null machine operand!");
+ const MachineRegisterInfo &MRI = LdSt.getParent()->getParent()->getRegInfo();
+ const Register Reg = MOp->getReg();
+ const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
+ ? MRI.getRegClass(Reg)
+ : RI.getPhysRegClass(Reg);
+ return (RI.getRegSizeInBits(*DstRC) / 8);
+}
+
+bool SIInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
- const {
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
unsigned Opc = LdSt.getOpcode();
OffsetIsScalable = false;
- const MachineOperand *BaseOp, *OffsetOp;
+ const MachineOperand *BaseOp, *OffsetOp, *MOp;
if (isDS(LdSt)) {
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
}
BaseOps.push_back(BaseOp);
Offset = OffsetOp->getImm();
+ // Get appropriate operand, and compute width accordingly.
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+ if (!MOp)
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::data0);
+ Width = getOperandSizeInBytes(LdSt, MOp);
} else {
// The 2 offset instructions use offset0 and offset1 instead. We can treat
// these as a load with a single offset if the 2 offsets are consecutive.
BaseOps.push_back(BaseOp);
Offset = EltSize * Offset0;
+ // Get appropriate operand(s), and compute width accordingly.
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+ if (!MOp) {
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::data0);
+ Width = getOperandSizeInBytes(LdSt, MOp);
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::data1);
+ Width += getOperandSizeInBytes(LdSt, MOp);
+ } else {
+ Width = getOperandSizeInBytes(LdSt, MOp);
+ }
}
return true;
}
BaseOps.push_back(RSrc);
BaseOps.push_back(SOffset);
Offset = OffsetImm->getImm();
+ // Get appropriate operand, and compute width accordingly.
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+ if (!MOp)
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+ Width = getOperandSizeInBytes(LdSt, MOp);
return true;
}
Offset = OffsetImm->getImm();
if (SOffset) // soffset can be an inline immediate.
Offset += SOffset->getImm();
+ // Get appropriate operand, and compute width accordingly.
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+ if (!MOp)
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+ Width = getOperandSizeInBytes(LdSt, MOp);
return true;
}
BaseOps.push_back(BaseOp);
OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
Offset = OffsetOp ? OffsetOp->getImm() : 0;
+ // Get appropriate operand, and compute width accordingly.
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::sdst);
+ Width = getOperandSizeInBytes(LdSt, MOp);
return true;
}
if (BaseOp)
BaseOps.push_back(BaseOp);
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
+ // Get appropriate operand, and compute width accordingly.
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+ if (!MOp)
+ MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+ Width = getOperandSizeInBytes(LdSt, MOp);
return true;
}
bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads) const {
+ unsigned NumLoads,
+ unsigned NumBytes) const {
assert(!BaseOps1.empty() && !BaseOps2.empty());
const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
const MachineInstr &MIb) const {
SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1;
int64_t Offset0, Offset1;
+ unsigned Dummy0, Dummy1;
bool Offset0IsScalable, Offset1IsScalable;
- if (!getMemOperandsWithOffset(MIa, BaseOps0, Offset0, Offset0IsScalable, &RI) ||
- !getMemOperandsWithOffset(MIb, BaseOps1, Offset1, Offset1IsScalable, &RI))
+ if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable,
+ Dummy0, &RI) ||
+ !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable,
+ Dummy1, &RI))
return false;
if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1))
int64_t &Offset1,
int64_t &Offset2) const override;
- bool
- getMemOperandsWithOffset(const MachineInstr &LdSt,
- SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable,
- const TargetRegisterInfo *TRI) const final;
+ unsigned getOperandSizeInBytes(const MachineInstr &LdSt,
+ const MachineOperand *MOp) const;
+
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const final;
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
- unsigned NumLoads) const override;
+ unsigned NumLoads, unsigned NumBytes) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
int64_t Offset1, unsigned NumLoads) const override;
}
/// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOperandsWithOffset(
+bool HexagonInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
- unsigned AccessSize = 0;
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
OffsetIsScalable = false;
- const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
+ const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, Width);
if (!BaseOp || !BaseOp->isReg())
return false;
BaseOps.push_back(BaseOp);
bool expandPostRAPseudo(MachineInstr &MI) const override;
/// Get the base register and byte offset of a load/store instr.
- bool
- getMemOperandsWithOffset(const MachineInstr &LdSt,
- SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
/// Reverses the branch condition of the specified condition list,
/// returning false on success and true if it cannot be reversed.
return true;
}
-bool LanaiInstrInfo::getMemOperandsWithOffset(
+bool LanaiInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
switch (LdSt.getOpcode()) {
default:
return false;
case Lanai::LDBs_RI:
case Lanai::LDBz_RI:
const MachineOperand *BaseOp;
- unsigned Width;
OffsetIsScalable = false;
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
return false;
bool expandPostRAPseudo(MachineInstr &MI) const override;
- bool
- getMemOperandsWithOffset(const MachineInstr &LdSt,
- SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
}
}
-bool X86InstrInfo::getMemOperandsWithOffset(
+bool X86InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
if (MemRefBegin < 0)
return false;
OffsetIsScalable = false;
+ // FIXME: Relying on memoperands() may not be right thing to do here. Check
+ // with X86 maintainers, and fix it accordingly. For now, it is ok, since
+ // there is no use of `Width` for X86 back-end at the moment.
+ Width =
+ !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
BaseOps.push_back(BaseOp);
return true;
}
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
- bool
- getMemOperandsWithOffset(const MachineInstr &LdSt,
- SmallVectorImpl<const MachineOperand *> &BaseOps,
- int64_t &Offset, bool &OffsetIsScalable,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
bool analyzeBranchPredicate(MachineBasicBlock &MBB,
TargetInstrInfo::MachineBranchPredicate &MBP,
bool AllowModify = false) const override;