return true;
}
+bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
+ const OperandsMapper &OpdMapper) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+
+ // Insert basic copies.
+ applyDefaultMapping(OpdMapper);
+
+ Register Dst0 = MI.getOperand(0).getReg();
+ Register Dst1 = MI.getOperand(1).getReg();
+ Register Src0 = MI.getOperand(2).getReg();
+ Register Src1 = MI.getOperand(3).getReg();
+ Register Src2 = MI.getOperand(4).getReg();
+
+ if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
+ return true;
+
+ bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
+ LLT S1 = LLT::scalar(1);
+ LLT S32 = LLT::scalar(32);
+
+ bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
+ bool Accumulate = true;
+
+ if (!DstOnValu) {
+ if (mi_match(Src2, MRI, m_ZeroInt()))
+ Accumulate = false;
+ }
+
+ // Keep the multiplication on the SALU.
+ MachineIRBuilder B(MI);
+
+ Register DstHi;
+ Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0);
+ bool MulHiInVgpr = false;
+
+ MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
+
+ if (Subtarget.hasSMulHi()) {
+ DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0)
+ : B.buildSMulH(S32, Src0, Src1).getReg(0);
+ MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
+ } else {
+ Register VSrc0 = B.buildCopy(S32, Src0).getReg(0);
+ Register VSrc1 = B.buildCopy(S32, Src1).getReg(0);
+
+ MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
+ MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
+
+ DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)
+ : B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);
+ MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
+
+ if (!DstOnValu) {
+ DstHi = buildReadFirstLane(B, MRI, DstHi);
+ } else {
+ MulHiInVgpr = true;
+ }
+ }
+
+ // Accumulate and produce the "carry-out" bit.
+ //
+ // The "carry-out" is defined as bit 64 of the result when computed as a
+ // big integer. For unsigned multiply-add, this matches the usual definition
+ // of carry-out. For signed multiply-add, bit 64 is the sign bit of the
+ // result, which is determined as:
+ // sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add
+ LLT CarryType = DstOnValu ? S1 : S32;
+ const RegisterBank &CarryBank =
+ DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
+ const RegisterBank &DstBank =
+ DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
+ Register Carry;
+ Register Zero;
+
+ if (!IsUnsigned) {
+ Zero = B.buildConstant(S32, 0).getReg(0);
+ MRI.setRegBank(Zero,
+ MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
+
+ Carry = B.buildICmp(CmpInst::ICMP_SLT, MulHiInVgpr ? S1 : S32, DstHi, Zero)
+ .getReg(0);
+ MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
+ : AMDGPU::SGPRRegBank);
+
+ if (DstOnValu && !MulHiInVgpr) {
+ Carry = B.buildTrunc(S1, Carry).getReg(0);
+ MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
+ }
+ }
+
+ if (Accumulate) {
+ if (DstOnValu) {
+ DstLo = B.buildCopy(S32, DstLo).getReg(0);
+ DstHi = B.buildCopy(S32, DstHi).getReg(0);
+ MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
+ MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
+ }
+
+ auto Unmerge = B.buildUnmerge(S32, Src2);
+ Register Src2Lo = Unmerge.getReg(0);
+ Register Src2Hi = Unmerge.getReg(1);
+ MRI.setRegBank(Src2Lo, DstBank);
+ MRI.setRegBank(Src2Hi, DstBank);
+
+ if (!IsUnsigned) {
+ auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero);
+ MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
+
+ Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
+ MRI.setRegBank(Carry, CarryBank);
+ }
+
+ auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo);
+ DstLo = AddLo.getReg(0);
+ Register CarryLo = AddLo.getReg(1);
+ MRI.setRegBank(DstLo, DstBank);
+ MRI.setRegBank(CarryLo, CarryBank);
+
+ auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);
+ DstHi = AddHi.getReg(0);
+ MRI.setRegBank(DstHi, DstBank);
+
+ Register CarryHi = AddHi.getReg(1);
+ MRI.setRegBank(CarryHi, CarryBank);
+
+ if (IsUnsigned) {
+ Carry = CarryHi;
+ } else {
+ Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0);
+ MRI.setRegBank(Carry, CarryBank);
+ }
+ } else {
+ if (IsUnsigned) {
+ Carry = B.buildConstant(CarryType, 0).getReg(0);
+ MRI.setRegBank(Carry, CarryBank);
+ }
+ }
+
+ B.buildMerge(Dst0, {DstLo, DstHi});
+
+ if (DstOnValu) {
+ B.buildCopy(Dst1, Carry);
+ } else {
+ B.buildTrunc(Dst1, Carry);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Return a suitable opcode for extending the operands of Opc when widening.
static unsigned getExtendOp(unsigned Opc) {
switch (Opc) {
case AMDGPU::G_UBFX:
applyMappingBFE(OpdMapper, /*Signed*/ false);
return;
+ case AMDGPU::G_AMDGPU_MAD_U64_U32:
+ case AMDGPU::G_AMDGPU_MAD_I64_I32:
+ applyMappingMAD_64_32(OpdMapper);
+ return;
default:
break;
}
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
+ case AMDGPU::G_AMDGPU_MAD_U64_U32:
+ case AMDGPU::G_AMDGPU_MAD_I64_I32: {
+ // Three possible mappings:
+ //
+ // - Default SOP
+ // - Default VOP
+ // - Scalar multiply: src0 and src1 are SGPRs, the rest is VOP.
+ //
+ // This allows instruction selection to keep the multiplication part of the
+ // instruction on the SALU.
+ bool AllSalu = true;
+ bool MulSalu = true;
+ for (unsigned i = 0; i < 5; ++i) {
+ Register Reg = MI.getOperand(i).getReg();
+ if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
+ if (Bank->getID() != AMDGPU::SGPRRegBankID) {
+ AllSalu = false;
+ if (i == 2 || i == 3) {
+ MulSalu = false;
+ break;
+ }
+ }
+ }
+ }
+
+ if (AllSalu)
+ return getDefaultMappingSOP(MI);
+
+ // If the multiply-add is full-rate in VALU, use that even if the
+ // multiplication part is scalar. Accumulating separately on the VALU would
+ // take two instructions.
+ if (!MulSalu || Subtarget.hasFullRate64Ops())
+ return getDefaultMappingVOP(MI);
+
+ // Keep the multiplication on the SALU, then accumulate on the VALU.
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
+ break;
+ }
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX9MI %s
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX10 %s
+
+---
+name: mad_u64_u32_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ;
+ ;
+ ; GFX8-LABEL: name: mad_u64_u32_sss
+ ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
+ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]]
+ ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
+ ; GFX9MI-LABEL: name: mad_u64_u32_sss
+ ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
+ ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]]
+ ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
+ ; GFX10-LABEL: name: mad_u64_u32_sss
+ ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
+ ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]]
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s32) = COPY $sgpr3
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_ssv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ ;
+ ;
+ ; GFX8-LABEL: name: mad_u64_u32_ssv
+ ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]]
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32)
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]]
+ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]]
+ ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1)
+ ; GFX9MI-LABEL: name: mad_u64_u32_ssv
+ ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]]
+ ; GFX10-LABEL: name: mad_u64_u32_ssv
+ ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]]
+ ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]]
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $vgpr0
+ %3:_(s32) = COPY $vgpr1
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_svs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_svs
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s32) = COPY $sgpr1
+ %3:_(s32) = COPY $sgpr2
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_svv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_svv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s32) = COPY $vgpr1
+ %3:_(s32) = COPY $vgpr2
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_vss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_vss
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s32) = COPY $sgpr1
+ %3:_(s32) = COPY $sgpr2
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_vsv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_vsv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $vgpr1
+ %3:_(s32) = COPY $vgpr2
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_vvs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_vvs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr1
+ %3:_(s32) = COPY $sgpr2
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_vvv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
+...
+
+---
+name: mad_i64_i32_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ;
+ ;
+ ; GFX8-LABEL: name: mad_i64_i32_sss
+ ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec
+ ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]]
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
+ ; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
+ ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
+ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]]
+ ; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
+ ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
+ ; GFX9MI-LABEL: name: mad_i64_i32_sss
+ ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
+ ; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX9MI-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
+ ; GFX9MI-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
+ ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
+ ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]]
+ ; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
+ ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
+ ; GFX10-LABEL: name: mad_i64_i32_sss
+ ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
+ ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
+ ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]]
+ ; GFX10-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s32) = COPY $sgpr3
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4
+...
+
+---
+name: mad_i64_i32_ssv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ ;
+ ;
+ ; GFX8-LABEL: name: mad_i64_i32_ssv
+ ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]]
+ ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32)
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
+ ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]]
+ ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]]
+ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]]
+ ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]]
+ ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1)
+ ; GFX9MI-LABEL: name: mad_i64_i32_ssv
+ ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]]
+ ; GFX10-LABEL: name: mad_i64_i32_ssv
+ ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]]
+ ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]]
+ ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]]
+ ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]]
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $vgpr0
+ %3:_(s32) = COPY $vgpr1
+ %4:_(s64) = G_MERGE_VALUES %2, %3
+ %5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4
+...
+
+---
+name: mad_u64_u32_ss0
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ;
+ ;
+ ; GFX8-LABEL: name: mad_u64_u32_ss0
+ ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY2]], [[COPY3]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
+ ; GFX9MI-LABEL: name: mad_u64_u32_ss0
+ ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32)
+ ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
+ ; GFX10-LABEL: name: mad_u64_u32_ss0
+ ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32)
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2
+...
+
+---
+name: mad_u64_u32_vv0
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_u64_u32_vv0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2
+...
+
+---
+name: mad_i64_i32_ss0
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ;
+ ;
+ ; GFX8-LABEL: name: mad_i64_i32_ss0
+ ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY2]], [[COPY3]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C1]]
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
+ ; GFX9MI-LABEL: name: mad_i64_i32_ss0
+ ; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
+ ; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]]
+ ; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32)
+ ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
+ ; GFX10-LABEL: name: mad_i64_i32_ss0
+ ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]]
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32)
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2
+...
+
+---
+name: mad_i64_i32_vv0
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ;
+ ;
+ ; CHECK-LABEL: name: mad_i64_i32_vv0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
+ ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[COPY2]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2
+...