MachineInstr *MixInst =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst)
.addImm(Src0Mods)
- .addReg(Src0)
+ .addReg(copyToVGPRIfSrcFolded(Src0, Src0Mods, I.getOperand(1), &I))
.addImm(Src1Mods)
- .addReg(Src1)
+ .addReg(copyToVGPRIfSrcFolded(Src1, Src1Mods, I.getOperand(2), &I))
.addImm(Src2Mods)
- .addReg(Src2)
+ .addReg(copyToVGPRIfSrcFolded(Src2, Src2Mods, I.getOperand(3), &I))
.addImm(0)
.addImm(0)
.addImm(0);
}
std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
- MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const {
+ MachineOperand &Root, bool AllowAbs, bool OpSel) const {
Register Src = Root.getReg();
- Register OrigSrc = Src;
unsigned Mods = 0;
MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
if (OpSel)
Mods |= SISrcMods::OP_SEL_0;
+ return std::make_pair(Src, Mods);
+}
+
+Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
+ Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
+ bool ForceVGPR) const {
if ((Mods != 0 || ForceVGPR) &&
RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
- MachineInstr *UseMI = Root.getParent();
// If we looked through copies to find source modifiers on an SGPR operand,
// we now have an SGPR register source. To avoid potentially violating the
// constant bus restriction, we need to insert a copy to a VGPR.
- Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc);
- BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
+ Register VGPRSrc = MRI->cloneVirtualRegister(Root.getReg());
+ BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
TII.get(AMDGPU::COPY), VGPRSrc)
- .addReg(Src);
+ .addReg(Src);
Src = VGPRSrc;
}
- return std::make_pair(Src, Mods);
+ return Src;
}
///
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
+ },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
+ },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}
std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
+ },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}
return None;
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}
unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
/* AllowAbs */ false,
- /* OpSel */ false,
- /* ForceVGPR */ true);
+ /* OpSel */ false);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(
+ copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));
+ },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
}};
}
unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
/* AllowAbs */ false,
- /* OpSel */ true,
- /* ForceVGPR */ true);
+ /* OpSel */ true);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(
+ copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));
+ },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
}};
}
ret float %fma
}
+define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) {
+; GFX6-LABEL: dont_crash_after_fma_mix_select_attempt:
+; GFX6: ; %bb.0: ; %.entry
+; GFX6-NEXT: v_fma_f32 v0, |s0|, v0, v1
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: dont_crash_after_fma_mix_select_attempt:
+; GFX8: ; %bb.0: ; %.entry
+; GFX8-NEXT: v_fma_f32 v0, |s0|, v0, v1
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: dont_crash_after_fma_mix_select_attempt:
+; GFX11: ; %bb.0: ; %.entry
+; GFX11-NEXT: v_fma_f32 v0, |s0|, v0, v1
+; GFX11-NEXT: ; return to shader part epilog
+.entry:
+ %fabs.x = call contract float @llvm.fabs.f32(float %x)
+ %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
+ ret float %fma
+}
+
declare half @llvm.fma.f16(half, half, half) #0
declare float @llvm.fma.f32(float, float, float) #0
declare double @llvm.fma.f64(double, double, double) #0