llvm_unreachable("Unhandled operand type in cvtVOPD");
};
- auto InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
+ const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
// MCInst operands are ordered as follows:
// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
}
for (auto CompIdx : VOPD::COMPONENTS) {
+ const auto &CInfo = InstInfo[CompIdx];
+ bool CompHasSrc2Acc = CInfo.hasSrc2Acc();
auto SrcOperandsNum = InstInfo[CompIdx].getSrcOperandsNum();
for (unsigned SrcIdx = 0; SrcIdx < SrcOperandsNum; ++SrcIdx) {
- addOp(InstInfo[CompIdx].getParsedSrcIndex(SrcIdx));
+ addOp(CInfo.getParsedSrcIndex(SrcIdx, CompHasSrc2Acc));
}
}
}
assert(TiedIdx == -1 || TiedIdx == Component::DST);
HasSrc2Acc = TiedIdx != -1;
- SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs() - HasSrc2Acc;
+ SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
- auto OperandsNum = OpDesc.getNumOperands() - HasSrc2Acc;
+ auto OperandsNum = OpDesc.getNumOperands();
for (unsigned OprIdx = Component::SRC1; OprIdx < OperandsNum; ++OprIdx) {
if (OpDesc.OpInfo[OprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
MandatoryLiteralIdx = OprIdx;
auto SrcIdx = OprIdx - Component::DST_NUM;
if (SrcIdx < getSrcOperandsNum())
- return getParsedSrcIndex(SrcIdx);
+ return getParsedSrcIndex(SrcIdx, hasSrc2Acc());
// The specified operand does not exist.
return 0;
unsigned Src2Reg = 0;
if (Comp.hasRegularSrcOperand(2))
Src2Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(2));
- else if (Comp.hasSrc2Acc())
- Src2Reg = DstReg;
return {DstReg, Src0Reg, Src1Reg, Src2Reg};
}
const auto &OpXDesc = InstrInfo->get(OpX);
const auto &OpYDesc = InstrInfo->get(OpY);
VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
- VOPD::ComponentInfo OpYInfo(OpYDesc, VOPD::ComponentKind::COMPONENT_Y,
- OpXInfo.getSrcOperandsNum());
+ VOPD::ComponentInfo OpYInfo(
+ OpYDesc, VOPD::ComponentKind::COMPONENT_Y, OpXInfo.getSrcOperandsNum(),
+ OpXInfo.getSrcOperandsNum() - OpXInfo.hasSrc2Acc());
return VOPD::InstInfo(OpXInfo, OpYInfo);
}
// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
// Each ComponentKind has operand indices defined below.
- static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 4 /* + OpXSrcNum */};
- static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {2, 2,
- 5 /* + OpXSrcNum */};
+ static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
+ 4 /* + ParsedOpXSrcNum */};
+ static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
+ 2, 2, 5 /* + ParsedOpXSrcNum */};
private:
ComponentKind Kind;
unsigned OpXSrcNum;
+ unsigned ParsedOpXSrcNum;
public:
- ComponentLayout(ComponentKind Kind_ = ComponentKind::SINGLE,
- unsigned OpXSrcNum_ = 0)
- : Kind(Kind_), OpXSrcNum(OpXSrcNum_) {
+ ComponentLayout(ComponentKind Kind = ComponentKind::SINGLE,
+ unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
+ : Kind(Kind), OpXSrcNum(OpXSrcNum), ParsedOpXSrcNum(ParsedOpXSrcNum) {
assert(Kind <= ComponentKind::MAX);
assert((Kind == ComponentKind::COMPONENT_Y) == (OpXSrcNum > 0));
}
}
unsigned getParsedDstIndex() const {
- return PARSED_DST_IDX[Kind] + OpXSrcNum;
+ return PARSED_DST_IDX[Kind] + ParsedOpXSrcNum;
}
- unsigned getParsedSrcIndex(unsigned SrcIdx) const {
+ unsigned getParsedSrcIndex(unsigned SrcIdx, bool ComponentHasSrc2Acc) const {
assert(SrcIdx < Component::MAX_SRC_NUM);
- return FIRST_PARSED_SRC_IDX[Kind] + OpXSrcNum + SrcIdx;
+ // FMAC and DOT2C have a src2 operand on the MCInst but
+ // not on the asm representation. src2 is tied to dst.
+ if (ComponentHasSrc2Acc && SrcIdx == (MAX_SRC_NUM - 1))
+ return getParsedDstIndex();
+ return FIRST_PARSED_SRC_IDX[Kind] + ParsedOpXSrcNum + SrcIdx;
}
};
public:
ComponentInfo(const MCInstrDesc &OpDesc,
ComponentKind Kind = ComponentKind::SINGLE,
- unsigned OpXSrcNum = 0)
- : ComponentLayout(Kind, OpXSrcNum), ComponentProps(OpDesc) {}
+ unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
+ : ComponentLayout(Kind, OpXSrcNum, ParsedOpXSrcNum),
+ ComponentProps(OpDesc) {}
// Map MC operand index to parsed operand index.
// Return 0 if the specified operand does not exist.
let InsVOP3Base = getIns64<Src0VOP3DPP, Src1RC64, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasModifiers, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
+ // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
+ let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
+ let InsVOPDXDeferred =
+ (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X,
+ VGPR_32:$vsrc1X, VGPRSrc_32:$src2X);
+ let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
+ let InsVOPDYDeferred =
+ (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y,
+ VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y);
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
let mayRaiseFPException = ReadsModeReg;
+ // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
+ // passes to track its uses. Its presence does not affect VOPD formation rules
+ // because the rules for src2 and dst are the same. src2X and src2Y should not
+ // be encoded.
+ bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
+ bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
+ string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
+ string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
+ let Constraints =
+ ConstraintsX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # ConstraintsY;
+ string DisableEncodingX = !if(hasSrc2AccX, "$src2X", "");
+ string DisableEncodingY = !if(hasSrc2AccY, "$src2Y", "");
+ let DisableEncoding =
+ DisableEncodingX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # DisableEncodingY;
+
let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
- ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
; PAIR-NEXT: $sgpr20 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
- ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
+ ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
- ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
- ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
- ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
+++ /dev/null
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
-
-# FIXME: Second VOPD pair reads vgpr0 and vgpr1 written by first pair, so there
-# should be a delay.
----
-name: vopd_fmac_fmac
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: vopd_fmac_fmac
- ; CHECK: $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
- ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
- $vgpr0 = IMPLICIT_DEF
- $vgpr1 = IMPLICIT_DEF
- $vgpr2 = IMPLICIT_DEF
- $vgpr3 = IMPLICIT_DEF
- $vgpr4 = IMPLICIT_DEF
- $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
- $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
- $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
- $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
-...
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: vopd_fmac_fmac
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vopd_fmac_fmac
+ ; CHECK: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU 1
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+ $vgpr2 = IMPLICIT_DEF
+ $vgpr3 = IMPLICIT_DEF
+ $vgpr4 = IMPLICIT_DEF
+ $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+ $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+...
+---
+name: vopd_dot2c_dot2c
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vopd_dot2c_dot2c
+ ; CHECK: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU 1
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+ $vgpr2 = IMPLICIT_DEF
+ $vgpr3 = IMPLICIT_DEF
+ $vgpr4 = IMPLICIT_DEF
+ $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+ $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+...
v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3
// GFX11: error: src2 operands must use different VGPR banks
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3
-// GFX11-NEXT:{{^}} ^
+// GFX11-NEXT:{{^}} ^