return LoopBB;
}
+static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) {
+ switch (VecRC->getSize()) {
+ case 4:
+ return AMDGPU::V_MOVRELD_B32_V1;
+ case 8:
+ return AMDGPU::V_MOVRELD_B32_V2;
+ case 16:
+ return AMDGPU::V_MOVRELD_B32_V4;
+ case 32:
+ return AMDGPU::V_MOVRELD_B32_V8;
+ case 64:
+ return AMDGPU::V_MOVRELD_B32_V16;
+ default:
+ llvm_unreachable("unsupported size for MOVRELD pseudos");
+ }
+}
+
static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
MachineBasicBlock &MBB,
const SISubtarget &ST) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
} else {
- const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32);
+ const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
- MachineInstr *MovRel =
- BuildMI(MBB, I, DL, MovRelDesc)
- .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
- .addOperand(*Val)
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(SrcVec->getReg(), RegState::Implicit);
-
- const int ImpDefIdx = MovRelDesc.getNumOperands() +
- MovRelDesc.getNumImplicitUses();
- const int ImpUseIdx = ImpDefIdx + 1;
-
- MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+ BuildMI(MBB, I, DL, MovRelDesc)
+ .addReg(Dst, RegState::Define)
+ .addReg(SrcVec->getReg())
+ .addOperand(*Val)
+ .addImm(SubReg - AMDGPU::sub0);
}
MI.eraseFromParent();
.addReg(PhiReg, RegState::Implicit)
.addReg(AMDGPU::M0, RegState::Implicit);
} else {
- const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32);
- // vdst is not actually read and just provides the base register index.
- MachineInstr *MovRel =
- BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
- .addReg(PhiReg, RegState::Undef, SubReg) // vdst
- .addOperand(*Val)
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(PhiReg, RegState::Implicit);
-
- const int ImpDefIdx = MovRelDesc.getNumOperands() +
- MovRelDesc.getNumImplicitUses();
- const int ImpUseIdx = ImpDefIdx + 1;
-
- MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+ const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
+
+ BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
+ .addReg(Dst, RegState::Define)
+ .addReg(PhiReg)
+ .addOperand(*Val)
+ .addImm(SubReg - AMDGPU::sub0);
}
MI.eraseFromParent();
MI.eraseFromParent();
break;
}
+ case AMDGPU::V_MOVRELD_B32_V1:
+ case AMDGPU::V_MOVRELD_B32_V2:
+ case AMDGPU::V_MOVRELD_B32_V4:
+ case AMDGPU::V_MOVRELD_B32_V8:
+ case AMDGPU::V_MOVRELD_B32_V16: {
+ const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
+ unsigned VecReg = MI.getOperand(0).getReg();
+ bool IsUndef = MI.getOperand(1).isUndef();
+ unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
+ assert(VecReg == MI.getOperand(1).getReg());
+
+ MachineInstr *MovRel =
+ BuildMI(MBB, MI, DL, MovRelDesc)
+ .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+ .addOperand(MI.getOperand(2))
+ .addReg(VecReg, RegState::ImplicitDefine)
+ .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+
+ const int ImpDefIdx =
+ MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
+ const int ImpUseIdx = ImpDefIdx + 1;
+ MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+
+ MI.eraseFromParent();
+ break;
+ }
case AMDGPU::SI_PC_ADD_REL_OFFSET: {
MachineFunction &MF = *MBB.getParent();
unsigned Reg = MI.getOperand(0).getReg();
let VOP1 = 1;
}
+// This is a pseudo variant of the v_movreld_b32 instruction in which the
+// vector operand appears only twice, once as def and once as use. Using this
+// pseudo avoids problems with the Two Address instructions pass.
+class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
+ (outs rc:$vdst),
+ (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
+ let VOP1 = 1;
+
+ let Constraints = "$vsrc = $vdst";
+ let Uses = [M0, EXEC];
+
+ let SubtargetPredicate = HasMovrel;
+}
+
+def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
+def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
+def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
+def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
+def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
+
let Predicates = [isVI] in {
def : Pat <
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: v_movreld_b32_e32 v0,
+; GCN: v_mov_b32_e32 v0, v1
+; GCN: ; return
+define amdgpu_ps float @main(i32 inreg %arg) #0 {
+main_body:
+ %tmp24 = insertelement <2 x float> undef, float 0.000000e+00, i32 %arg
+ %tmp25 = extractelement <2 x float> %tmp24, i32 1
+ ret float %tmp25
+}
+
+attributes #0 = { "InitialPSInputAddr"="36983" }