.addReg(SrcReg, getKillRegState(KillSrc));
}
+/// Handle copying from SGPR to AGPR, or from AGPR to AGPR. It is not possible
+/// to directly copy, so an intermediate VGPR needs to be used.
+static void indirectCopyToAGPR(const SIInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc,
+ RegScavenger &RS) {
+ const SIRegisterInfo &RI = TII.getRegisterInfo();
+
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_32RegClass.contains(SrcReg));
+
+ // First try to find defining accvgpr_write to avoid temporary registers.
+ for (auto Def = MI, E = MBB.begin(); Def != E; ) {
+ --Def;
+ if (!Def->definesRegister(SrcReg, &RI))
+ continue;
+ if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ break;
+
+ MachineOperand &DefOp = Def->getOperand(1);
+ assert(DefOp.isReg() || DefOp.isImm());
+
+ if (DefOp.isReg()) {
+ // Check that register source operand if not clobbered before MI.
+ // Immediate operands are always safe to propagate.
+ bool SafeToPropagate = true;
+ for (auto I = Def; I != MI && SafeToPropagate; ++I)
+ if (I->modifiesRegister(DefOp.getReg(), &RI))
+ SafeToPropagate = false;
+
+ if (!SafeToPropagate)
+ break;
+
+ DefOp.setIsKill(false);
+ }
+
+ BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+ .add(DefOp);
+ return;
+ }
+
+ RS.enterBasicBlock(MBB);
+ RS.forward(MI);
+
+ // Ideally we want to have three registers for a long reg_sequence copy
+ // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
+ unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+ *MBB.getParent());
+
+ // Registers in the sequence are allocated contiguously so we can just
+ // use register number to pick one of three round-robin temps.
+ unsigned RegNo = DestReg % 3;
+ Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp)
+ report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
+ RS.setRegUsed(Tmp);
+ // Only loop through if there are any free registers left, otherwise
+ // scavenger may report a fatal error without emergency spill slot
+ // or spill with the slot.
+ while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
+ Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
+ break;
+ Tmp = Tmp2;
+ RS.setRegUsed(Tmp);
+ }
+
+ TII.copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
+ BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+ .addReg(Tmp, RegState::Kill);
+}
+
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
return;
}
- if (RC == &AMDGPU::AGPR_32RegClass) {
- assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg) ||
- AMDGPU::AGPR_32RegClass.contains(SrcReg));
- if (!AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
- // First try to find defining accvgpr_write to avoid temporary registers.
- for (auto Def = MI, E = MBB.begin(); Def != E; ) {
- --Def;
- if (!Def->definesRegister(SrcReg, &RI))
- continue;
- if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
- break;
-
- MachineOperand &DefOp = Def->getOperand(1);
- assert(DefOp.isReg() || DefOp.isImm());
-
- if (DefOp.isReg()) {
- // Check that register source operand if not clobbered before MI.
- // Immediate operands are always safe to propagate.
- bool SafeToPropagate = true;
- for (auto I = Def; I != MI && SafeToPropagate; ++I)
- if (I->modifiesRegister(DefOp.getReg(), &RI))
- SafeToPropagate = false;
-
- if (!SafeToPropagate)
- break;
- DefOp.setIsKill(false);
- }
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
- .add(DefOp);
- return;
- }
-
- RegScavenger RS;
- RS.enterBasicBlock(MBB);
- RS.forward(MI);
-
- // Ideally we want to have three registers for a long reg_sequence copy
- // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
- unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
- *MBB.getParent());
-
- // Registers in the sequence are allocated contiguously so we can just
- // use register number to pick one of three round-robin temps.
- unsigned RegNo = DestReg % 3;
- Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
- if (!Tmp)
- report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
- RS.setRegUsed(Tmp);
- // Only loop through if there are any free registers left, otherwise
- // scavenger may report a fatal error without emergency spill slot
- // or spill with the slot.
- while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
- unsigned Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
- if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
- break;
- Tmp = Tmp2;
- RS.setRegUsed(Tmp);
- }
- copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
+ if (RC == &AMDGPU::AGPR_32RegClass) {
+ if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
- .addReg(Tmp, RegState::Kill);
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ // FIXME: Pass should maintain scavenger to avoid scan through the block on
+ // every AGPR spill.
+ RegScavenger RS;
+ indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS);
return;
}