MachineFunction *Func = MBB->getParent();
MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
Func->push_back(NewMBB); //insert to function
- for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
- It != E; ++It) {
- MachineInstr *MI = Func->CloneMachineInstr(It);
- NewMBB->push_back(MI);
- }
+ for (const MachineInstr &It : *MBB)
+ NewMBB->push_back(Func->CloneMachineInstr(&It));
return NewMBB;
}
while (It != E) {
if (Pre->getOpcode() == AMDGPU::CONTINUE
&& It->getOpcode() == AMDGPU::ENDLOOP)
- ContInstr.push_back(Pre);
+ ContInstr.push_back(&*Pre);
Pre = It;
++It;
}
namespace {
-static bool isCFAlu(const MachineInstr *MI) {
- switch (MI->getOpcode()) {
+static bool isCFAlu(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
case AMDGPU::CF_ALU:
case AMDGPU::CF_ALU_PUSH_BEFORE:
return true;
static char ID;
const R600InstrInfo *TII;
- unsigned getCFAluSize(const MachineInstr *MI) const;
- bool isCFAluEnabled(const MachineInstr *MI) const;
+ unsigned getCFAluSize(const MachineInstr &MI) const;
+ bool isCFAluEnabled(const MachineInstr &MI) const;
/// IfCvt pass can generate "disabled" ALU clause marker that need to be
/// removed and their content affected to the previous alu clause.
/// This function parse instructions after CFAlu until it find a disabled
/// CFAlu and merge the content, or an enabled CFAlu.
- void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
+ void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
/// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
/// it is the case.
- bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
- const;
+ bool mergeIfPossible(MachineInstr &RootCFAlu,
+ const MachineInstr &LatrCFAlu) const;
public:
R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
char R600ClauseMergePass::ID = 0;
-unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
+unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
assert(isCFAlu(MI));
- return MI->getOperand(
- TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
+ return MI
+ .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
+ .getImm();
}
-bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
+bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
assert(isCFAlu(MI));
- return MI->getOperand(
- TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
+ return MI
+ .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
+ .getImm();
}
-void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
- const {
+void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
+ MachineInstr &CFAlu) const {
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
- MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
+ MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
I++;
do {
- while (I!= E && !isCFAlu(I))
+ while (I != E && !isCFAlu(*I))
I++;
if (I == E)
return;
- MachineInstr *MI = I++;
+ MachineInstr &MI = *I++;
if (isCFAluEnabled(MI))
break;
- CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
- MI->eraseFromParent();
+ CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
+ MI.eraseFromParent();
} while (I != E);
}
-bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
- const MachineInstr *LatrCFAlu) const {
+bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
+ const MachineInstr &LatrCFAlu) const {
assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
unsigned RootInstCount = getCFAluSize(RootCFAlu),
DEBUG(dbgs() << "Excess inst counts\n");
return false;
}
- if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
return false;
// Is KCache Bank 0 compatible ?
int Mode0Idx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
int KBank0LineIdx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
- if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
- RootCFAlu->getOperand(Mode0Idx).getImm() &&
- (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
- RootCFAlu->getOperand(KBank0Idx).getImm() ||
- LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
- RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
+ if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
+ RootCFAlu.getOperand(Mode0Idx).getImm() &&
+ (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
+ RootCFAlu.getOperand(KBank0Idx).getImm() ||
+ LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
+ RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
DEBUG(dbgs() << "Wrong KC0\n");
return false;
}
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
int KBank1LineIdx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
- if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
- RootCFAlu->getOperand(Mode1Idx).getImm() &&
- (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
- RootCFAlu->getOperand(KBank1Idx).getImm() ||
- LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
- RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
+ if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
+ RootCFAlu.getOperand(Mode1Idx).getImm() &&
+ (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
+ RootCFAlu.getOperand(KBank1Idx).getImm() ||
+ LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
+ RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
DEBUG(dbgs() << "Wrong KC0\n");
return false;
}
- if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
- RootCFAlu->getOperand(Mode0Idx).setImm(
- LatrCFAlu->getOperand(Mode0Idx).getImm());
- RootCFAlu->getOperand(KBank0Idx).setImm(
- LatrCFAlu->getOperand(KBank0Idx).getImm());
- RootCFAlu->getOperand(KBank0LineIdx).setImm(
- LatrCFAlu->getOperand(KBank0LineIdx).getImm());
+ if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
+ RootCFAlu.getOperand(Mode0Idx).setImm(
+ LatrCFAlu.getOperand(Mode0Idx).getImm());
+ RootCFAlu.getOperand(KBank0Idx).setImm(
+ LatrCFAlu.getOperand(KBank0Idx).getImm());
+ RootCFAlu.getOperand(KBank0LineIdx)
+ .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
}
- if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
- RootCFAlu->getOperand(Mode1Idx).setImm(
- LatrCFAlu->getOperand(Mode1Idx).getImm());
- RootCFAlu->getOperand(KBank1Idx).setImm(
- LatrCFAlu->getOperand(KBank1Idx).getImm());
- RootCFAlu->getOperand(KBank1LineIdx).setImm(
- LatrCFAlu->getOperand(KBank1LineIdx).getImm());
+ if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
+ RootCFAlu.getOperand(Mode1Idx).setImm(
+ LatrCFAlu.getOperand(Mode1Idx).getImm());
+ RootCFAlu.getOperand(KBank1Idx).setImm(
+ LatrCFAlu.getOperand(KBank1Idx).getImm());
+ RootCFAlu.getOperand(KBank1LineIdx)
+ .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
}
- RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
- RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
+ RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
+ RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
return true;
}
MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator LatestCFAlu = E;
while (I != E) {
- MachineInstr *MI = I++;
- if ((!TII->canBeConsideredALU(*MI) && !isCFAlu(MI)) ||
- TII->mustBeLastInClause(MI->getOpcode()))
+ MachineInstr &MI = *I++;
+ if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
+ TII->mustBeLastInClause(MI.getOpcode()))
LatestCFAlu = E;
if (!isCFAlu(MI))
continue;
cleanPotentialDisabledCFAlu(MI);
- if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
- MI->eraseFromParent();
+ if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
+ MI.eraseFromParent();
} else {
- assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
+ assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
LatestCFAlu = MI;
}
}
unsigned MaxFetchInst;
const R600Subtarget *ST;
- bool IsTrivialInst(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+ bool IsTrivialInst(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
return true;
return TII->get(Opcode);
}
- bool isCompatibleWithClause(const MachineInstr *MI,
- std::set<unsigned> &DstRegs) const {
+ bool isCompatibleWithClause(const MachineInstr &MI,
+ std::set<unsigned> &DstRegs) const {
unsigned DstMI, SrcMI;
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
+ for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
+ E = MI.operands_end();
+ I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
bool IsTex = TII->usesTextureCache(*ClauseHead);
std::set<unsigned> DstRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
- if (IsTrivialInst(I))
+ if (IsTrivialInst(*I))
continue;
if (AluInstCount >= MaxFetchInst)
break;
if ((IsTex && !TII->usesTextureCache(*I)) ||
(!IsTex && !TII->usesVertexCache(*I)))
break;
- if (!isCompatibleWithClause(I, DstRegs))
+ if (!isCompatibleWithClause(*I, DstRegs))
break;
AluInstCount ++;
- ClauseContent.push_back(I);
+ ClauseContent.push_back(&*I);
}
MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
getHWInstrDesc(IsTex?CF_TC:CF_VC))
return ClauseFile(MIb, std::move(ClauseContent));
}
- void getLiteral(MachineInstr *MI, std::vector<MachineOperand *> &Lits) const {
+ void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_W
};
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
- TII->getSrcs(*MI);
+ TII->getSrcs(MI);
for (const auto &Src:Srcs) {
if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
continue;
{ return val->isImm() && (val->getImm() == Imm);});
// Get corresponding Operand
- MachineOperand &Operand = MI->getOperand(
- TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal));
+ MachineOperand &Operand = MI.getOperand(
+ TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
if (It != Lits.end()) {
// Reuse existing literal reg
ClauseFile
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
const {
- MachineBasicBlock::iterator ClauseHead = I;
+ MachineInstr &ClauseHead = *I;
std::vector<MachineInstr *> ClauseContent;
I++;
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
- if (IsTrivialInst(I)) {
+ if (IsTrivialInst(*I)) {
++I;
continue;
}
break;
std::vector<MachineOperand *>Literals;
if (I->isBundle()) {
- MachineInstr *DeleteMI = I;
+ MachineInstr &DeleteMI = *I;
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
while (++BI != E && BI->isBundledWithPred()) {
BI->unbundleFromPred();
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
- getLiteral(&*BI, Literals);
+ getLiteral(*BI, Literals);
ClauseContent.push_back(&*BI);
}
I = BI;
- DeleteMI->eraseFromParent();
+ DeleteMI.eraseFromParent();
} else {
- getLiteral(I, Literals);
- ClauseContent.push_back(I);
+ getLiteral(*I, Literals);
+ ClauseContent.push_back(&*I);
I++;
}
for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
}
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
- ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
- return ClauseFile(ClauseHead, std::move(ClauseContent));
+ ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
+ return ClauseFile(&ClauseHead, std::move(ClauseContent));
}
void
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
- CounterPropagateAddr(Clause.first, CfCount);
+ CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
.addImm(CfCount);
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
Clause.first->getOperand(0).setImm(0);
- CounterPropagateAddr(Clause.first, CfCount);
+ CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
.addImm(CfCount);
CfCount += Clause.second.size();
}
- void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
- MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+ void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
+ MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
}
void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
unsigned Addr) const {
for (MachineInstr *MI : MIs) {
- CounterPropagateAddr(MI, Addr);
+ CounterPropagateAddr(*MI, Addr);
}
}
if (MI->getOpcode() != AMDGPU::ENDIF)
LastAlu.back() = nullptr;
if (MI->getOpcode() == AMDGPU::CF_ALU)
- LastAlu.back() = MI;
+ LastAlu.back() = &*MI;
I++;
bool RequiresWorkAround =
CFStack.requiresWorkAroundForInst(MI->getOpcode());
case AMDGPU::ELSE: {
MachineInstr * JumpInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(JumpInst, CfCount);
+ CounterPropagateAddr(*JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount);
+ CounterPropagateAddr(*IfOrElseInst, CfCount);
IfOrElseInst->getOperand(1).setImm(1);
LastAlu.pop_back();
MI->eraseFromParent();
const R600InstrInfo *TII;
int Address;
- unsigned OccupiedDwords(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+ unsigned OccupiedDwords(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
// These will be expanded to two ALU instructions in the
// ExpandSpecialInstructions pass.
- if (TII->isLDSRetInstr(MI->getOpcode()))
+ if (TII->isLDSRetInstr(MI.getOpcode()))
return 2;
- if(TII->isVector(*MI) ||
- TII->isCubeOp(MI->getOpcode()) ||
- TII->isReductionOp(MI->getOpcode()))
+ if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
+ TII->isReductionOp(MI.getOpcode()))
return 4;
unsigned NumLiteral = 0;
- for (MachineInstr::mop_iterator It = MI->operands_begin(),
- E = MI->operands_end(); It != E; ++It) {
+ for (MachineInstr::mop_iterator It = MI.operands_begin(),
+ E = MI.operands_end();
+ It != E; ++It) {
MachineOperand &MO = *It;
if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
++NumLiteral;
return 1 + NumLiteral;
}
- bool isALU(const MachineInstr *MI) const {
- if (TII->isALUInstr(MI->getOpcode()))
+ bool isALU(const MachineInstr &MI) const {
+ if (TII->isALUInstr(MI.getOpcode()))
return true;
- if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+ if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
return true;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
case AMDGPU::PRED_X:
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
}
}
- bool IsTrivialInst(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
+ bool IsTrivialInst(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
case AMDGPU::IMPLICIT_DEF:
((((Sel >> 2) - 512) & 4095) >> 5) << 1);
}
- bool SubstituteKCacheBank(MachineInstr *MI,
- std::vector<std::pair<unsigned, unsigned> > &CachedConsts,
- bool UpdateInstr = true) const {
+ bool
+ SubstituteKCacheBank(MachineInstr &MI,
+ std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
+ bool UpdateInstr = true) const {
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
- if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4)
+ if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
return true;
const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
- TII->getSrcs(*MI);
- assert((TII->isALUInstr(MI->getOpcode()) ||
- MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
+ TII->getSrcs(MI);
+ assert(
+ (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
+ "Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
// in the clause.
unsigned LastUseCount = 0;
for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
- AluInstCount += OccupiedDwords(UseI);
+ AluInstCount += OccupiedDwords(*UseI);
// Make sure we won't need to end the clause due to KCache limitations.
- if (!SubstituteKCacheBank(UseI, KCacheBanks, false))
+ if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
return false;
// We have reached the maximum instruction limit before finding the
bool PushBeforeModifier = false;
unsigned AluInstCount = 0;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
- if (IsTrivialInst(I))
+ if (IsTrivialInst(*I))
continue;
- if (!isALU(I))
+ if (!isALU(*I))
break;
if (AluInstCount > TII->getMaxAlusPerClause())
break;
if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
break;
- if (!SubstituteKCacheBank(I, KCacheBanks))
+ if (!SubstituteKCacheBank(*I, KCacheBanks))
break;
- AluInstCount += OccupiedDwords(I);
+ AluInstCount += OccupiedDwords(*I);
}
unsigned Opcode = PushBeforeModifier ?
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
if (I->getOpcode() == AMDGPU::CF_ALU)
continue; // BB was already parsed
for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
- if (isALU(I))
+ if (isALU(*I))
I = MakeALUClause(MBB, I);
else
++I;
MachineBasicBlock::iterator I) {
while (I != MBB.begin()) {
--I;
- MachineInstr *MI = I;
- if (isPredicateSetter(MI->getOpcode()))
- return MI;
+ MachineInstr &MI = *I;
+ if (isPredicateSetter(MI.getOpcode()))
+ return &MI;
}
return nullptr;
I->removeFromParent();
I = PriorI;
}
- MachineInstr *LastInst = I;
+ MachineInstr &LastInst = *I;
// If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
+ unsigned LastOpc = LastInst.getOpcode();
if (I == MBB.begin() ||
!isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
if (LastOpc == AMDGPU::JUMP) {
- TBB = LastInst->getOperand(0).getMBB();
+ TBB = LastInst.getOperand(0).getMBB();
return false;
} else if (LastOpc == AMDGPU::JUMP_COND) {
- MachineInstr *predSet = I;
+ auto predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
- TBB = LastInst->getOperand(0).getMBB();
+ TBB = LastInst.getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
}
// Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ MachineInstr &SecondLastInst = *I;
+ unsigned SecondLastOpc = SecondLastInst.getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
- MachineInstr *predSet = --I;
+ auto predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
- TBB = SecondLastInst->getOperand(0).getMBB();
- FBB = LastInst->getOperand(0).getMBB();
+ TBB = SecondLastInst.getOperand(0).getMBB();
+ FBB = LastInst.getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
(void)Tmp;
SrcVec = DstReg;
}
- Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg)
- .addReg(SrcVec);
- DEBUG(dbgs() << " ->"; Pos->dump(););
+ MachineInstr *NewMI =
+ BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
+ DEBUG(dbgs() << " ->"; NewMI->dump(););
DEBUG(dbgs() << " Updating Swizzle:\n");
for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
RSI->Instr->eraseFromParent();
// Update RSI
- RSI->Instr = Pos;
+ RSI->Instr = NewMI;
RSI->RegToChan = UpdatedRegToChan;
RSI->UndefReg = UpdatedUndef;
- return Pos;
+ return NewMI;
}
void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
MII != MIIE; ++MII) {
- MachineInstr *MI = MII;
- if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) {
- if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
- unsigned Reg = MI->getOperand(1).getReg();
+ MachineInstr &MI = *MII;
+ if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) {
+ if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
+ unsigned Reg = MI.getOperand(1).getReg();
for (MachineRegisterInfo::def_instr_iterator
It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
It != E; ++It) {
continue;
}
-
- RegSeqInfo RSI(*MRI, MI);
+ RegSeqInfo RSI(*MRI, &MI);
// All uses of MI are swizzeable ?
- unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Reg = MI.getOperand(0).getReg();
if (!areAllUsesSwizzeable(Reg))
continue;
- DEBUG (dbgs() << "Trying to optimize ";
- MI->dump();
- );
+ DEBUG({
+ dbgs() << "Trying to optimize ";
+ MI.dump();
+ });
RegSeqInfo CandidateRSI;
std::vector<std::pair<unsigned, unsigned> > RemapChan;
// Record pre-existing, explicitly requested waits
if (I->getOpcode() == AMDGPU::S_WAITCNT) {
handleExistingWait(*I);
- RemoveMI.push_back(I);
+ RemoveMI.push_back(&*I);
continue;
}
MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const {
// This assumes that all the users of SCC are in the same block
// as the SCC def.
- for (MachineBasicBlock::iterator I = SCCDefInst,
- E = SCCDefInst.getParent()->end();
- I != E; ++I) {
+ for (MachineInstr &MI :
+ llvm::make_range(MachineBasicBlock::iterator(SCCDefInst),
+ SCCDefInst.getParent()->end())) {
// Exit if we find another SCC def.
- if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
+ if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
return;
- if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
- Worklist.push_back(I);
+ if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
+ Worklist.push_back(&MI);
}
}
}
// Store a copy of the original live mask when required
- MachineBasicBlock &Entry = MF.front();
- MachineInstr *EntryMI = Entry.getFirstNonPHI();
unsigned LiveMaskReg = 0;
+ {
+ MachineBasicBlock &Entry = MF.front();
+ MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
+
+ if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
+ LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
+ .addReg(AMDGPU::EXEC);
+ }
- if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
- LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
- BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
- .addReg(AMDGPU::EXEC);
- }
-
- if (GlobalFlags == StateWQM) {
- // For a shader that needs only WQM, we can just set it once.
- BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+ if (GlobalFlags == StateWQM) {
+ // For a shader that needs only WQM, we can just set it once.
+ BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+ AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
- lowerLiveMaskQueries(LiveMaskReg);
- // EntryMI may become invalid here
- return true;
+ lowerLiveMaskQueries(LiveMaskReg);
+ // EntryMI may become invalid here
+ return true;
+ }
}
lowerLiveMaskQueries(LiveMaskReg);
- EntryMI = nullptr;
// Handle the general case
for (const auto &BII : Blocks)