return Ret;
}
-static MachineInstr *
-buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
- unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
- unsigned VM, bool Compr, unsigned Enabled, bool Done) {
- const DebugLoc &DL = Insert->getDebugLoc();
- MachineBasicBlock &BB = *Insert->getParent();
- unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
- return BuildMI(BB, Insert, DL, TII.get(Opcode))
- .addImm(Tgt)
- .addReg(Reg0)
- .addReg(Reg1)
- .addReg(Reg2)
- .addReg(Reg3)
- .addImm(VM)
- .addImm(Compr)
- .addImm(Enabled);
-}
-
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
int64_t C;
if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
MachineBasicBlock *BB = I.getParent();
unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
- case Intrinsic::amdgcn_exp: {
- int64_t Tgt = I.getOperand(1).getImm();
- int64_t Enabled = I.getOperand(2).getImm();
- int64_t Done = I.getOperand(7).getImm();
- int64_t VM = I.getOperand(8).getImm();
-
- MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
- I.getOperand(4).getReg(),
- I.getOperand(5).getReg(),
- I.getOperand(6).getReg(),
- VM, false, Enabled, Done);
-
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
- }
- case Intrinsic::amdgcn_exp_compr: {
- const DebugLoc &DL = I.getDebugLoc();
- int64_t Tgt = I.getOperand(1).getImm();
- int64_t Enabled = I.getOperand(2).getImm();
- Register Reg0 = I.getOperand(3).getReg();
- Register Reg1 = I.getOperand(4).getReg();
- Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- int64_t Done = I.getOperand(5).getImm();
- int64_t VM = I.getOperand(6).getImm();
-
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
- MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
- true, Enabled, Done);
-
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
- }
case Intrinsic::amdgcn_end_cf: {
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0
; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 1, 0
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), -1, 0
%5:vgpr(<2 x s16>) = G_BITCAST %0(s32)
; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK: EXP 1, %0, %0, [[UNDEF0]], [[UNDEF0]], 0, 1, 15, implicit $exec
+ ; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK: EXP 1, %0, %0, [[UNDEF1]], [[UNDEF0]], 0, 1, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0
- ; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK: EXP_DONE 1, %0, %0, [[UNDEF1]], [[UNDEF1]], 0, 1, 15, implicit $exec
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 1, 0
+ ; CHECK: [[UNDEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK: [[UNDEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK: EXP_DONE 1, %0, %0, [[UNDEF3]], [[UNDEF2]], 0, 1, 15, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), -1, 0