((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
+ // All DPP instructions with at least one source operand have a fake "old"
+ // source at the beginning that's tied to the dst operand. Handle it here.
+ if (Desc.getNumOperands() >= 2)
+ Inst.addOperand(Inst.getOperand(0));
+
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
-
- // special case v_mac_{f16, f32}:
- // it has src2 register operand that is tied to dst operand
- if (Inst.getOpcode() == AMDGPU::V_MAC_F32_dpp ||
- Inst.getOpcode() == AMDGPU::V_MAC_F16_dpp) {
- auto it = Inst.begin();
- std::advance(
- it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
- Inst.insert(it, Inst.getOperand(0)); // src2 = dst
- }
}
//===----------------------------------------------------------------------===//
);
}
-class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
- bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
+class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
dag ret = !if (!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
!if (!eq(NumSrcArgs, 1),
!if (!eq(HasModifiers, 1),
// VOP1_DPP with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ (ins DstRC:$old, Src0Mod:$src0_modifiers,
+ Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
/* else */,
// VOP1_DPP without modifiers
- (ins Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
+ (ins DstRC:$old, Src0RC:$src0,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
/* endif */)
/* NumSrcArgs == 2 */,
!if (!eq(HasModifiers, 1),
// VOP2_DPP with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ (ins DstRC:$old,
+ Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
/* else */,
// VOP2_DPP without modifiers
- (ins Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
- row_mask:$row_mask, bank_mask:$bank_mask,
- bound_ctrl:$bound_ctrl)
+ (ins DstRC:$old,
+ Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
+ row_mask:$row_mask, bank_mask:$bank_mask,
+ bound_ctrl:$bound_ctrl)
/* endif */)));
}
getOpSelMod<Src0VT>.ret,
getOpSelMod<Src1VT>.ret,
getOpSelMod<Src2VT>.ret>.ret;
- field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
+ field dag InsDPP = getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
- let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
let Uses = ps.Uses;
let SchedRW = ps.SchedRW;
let hasSideEffects = ps.hasSideEffects;
- let Constraints = ps.Constraints;
- let DisableEncoding = ps.DisableEncoding;
bits<8> vdst;
let Inst{8-0} = 0xfa; // dpp
def : Pat <
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
imm:$bound_ctrl)),
- (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
- (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
+ (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl))
>;
-
def : Pat<
(i32 (anyext i16:$src)),
(COPY $src)
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
- let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ let InsDPP = (ins DstRCDPP:$old,
+ Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
- VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
+ let InsDPP = (ins DstRCDPP:$old,
+ Src0Mod:$src0_modifiers, Src0DPP:$src0,
Src1Mod:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let Uses = ps.Uses;
let SchedRW = ps.SchedRW;
let hasSideEffects = ps.hasSideEffects;
- let Constraints = ps.Constraints;
- let DisableEncoding = ps.DisableEncoding;
bits<8> vdst;
bits<8> src1;
let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
+ let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
+ let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
let DecoderNamespace = "DPP";
}
body: |
bb.0:
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
- %vgpr1 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec
+ %vgpr1 = V_MOV_B32_dpp %vgpr1, %vgpr0, 0, 15, 15, 0, implicit %exec
S_BRANCH %bb.1
bb.1:
implicit %exec, implicit %vcc = V_CMPX_EQ_I32_e32 %vgpr0, %vgpr1, implicit %exec
- %vgpr3 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_dpp %vgpr3, %vgpr0, 0, 15, 15, 0, implicit %exec
S_ENDPGM
...
---
; VI-LABEL: {{^}}dpp_test:
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
+; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
; VI: s_nop 1
-; VI: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
+; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
+; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0
store i32 %tmp0, i32 addrspace(1)* %out
}
; VI-LABEL: {{^}}dpp_wait_states:
+; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
; VI: s_nop 1
-; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI: s_nop 1
-; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0
%tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0