for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
MachineOperand &Op = Inst.getOperand(i);
if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
+ // Only propagate through live-def of SCC.
+ if (Op.isDef() && !Op.isDead())
+ addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
Inst.RemoveOperand(i);
- addSCCDefUsersToVALUWorklist(Inst, Worklist);
}
}
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addSCCDefUsersToVALUWorklist(
- MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
+void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+ MachineInstr &SCCDefInst,
+ SetVectorType &Worklist) const {
+ // Ensure that def inst defines SCC, which is still live.
+ assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
+ !Op.isDead() && Op.getParent() == &SCCDefInst);
// This assumes that all the users of SCC are in the same block
// as the SCC def.
- for (MachineInstr &MI :
- make_range(MachineBasicBlock::iterator(SCCDefInst),
- SCCDefInst.getParent()->end())) {
+ for (MachineInstr &MI : // Skip the def inst itself.
+ make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
+ SCCDefInst.getParent()->end())) {
+ // Check if SCC is used first.
+ if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
+ Worklist.insert(&MI);
// Exit if we find another SCC def.
if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
return;
-
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
- Worklist.insert(&MI);
}
}
void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
SetVectorType &Worklist) const;
- void
- addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
- SetVectorType &Worklist) const;
+ void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+ MachineInstr &SCCDefInst,
+ SetVectorType &Worklist) const;
const TargetRegisterClass *
getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
--- /dev/null
+# RUN: llc -march=amdgcn -run-pass=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=GCN %s
+
+# GCN-LABEL: name: fix-sgpr-copies
+# GCN: V_ADD_I32_e32
+# GCN: V_ADDC_U32_e32
+---
+name: fix-sgpr-copies
+body: |
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:sreg_32 = IMPLICIT_DEF
+ %4:vgpr_32 = V_CVT_U32_F32_e64 0, %0:vgpr_32, 0, 0, implicit $exec
+ %5:sreg_32 = COPY %4:vgpr_32
+ %6:sreg_32 = S_ADD_I32 %2:sreg_32, %5:sreg_32, implicit-def $scc
+ %7:sreg_32 = S_ADDC_U32 %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $scc
+...
store i64 %result, i64 addrspace(1)* %out
ret void
}
+
+;FUNC-LABEL: {{^}}test_udiv_k:
+;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24
+;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24
+;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24
+;GCN: v_add
+;GCN: v_addc
+;GCN: v_addc
+;GCN: s_endpgm
+define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) {
+ %result = udiv i64 24, %x
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}