Use the algorithm from AMDGPUCodeGenPrepare::expandDivRem32.
Differential Revision: https://reviews.llvm.org/D83383
return false;
}
-static Register buildDivRCP(MachineIRBuilder &B, Register Src) {
- const LLT S32 = LLT::scalar(32);
-
- auto Cvt0 = B.buildUITOFP(S32, Src);
- auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Cvt0});
- auto FPUIntMaxPlus1 = B.buildFConstant(S32, BitsToFloat(0x4f800000));
- auto Mul = B.buildFMul(S32, RcpIFlag, FPUIntMaxPlus1);
- return B.buildFPTOUI(S32, Mul).getReg(0);
-}
-
void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
Register DstReg,
- Register Num,
- Register Den,
+ Register X,
+ Register Y,
bool IsDiv) const {
const LLT S1 = LLT::scalar(1);
const LLT S32 = LLT::scalar(32);
- // RCP = URECIP(Den) = 2^32 / Den + e
- // e is rounding error.
- auto RCP = buildDivRCP(B, Den);
-
- // RCP_LO = mul(RCP, Den)
- auto RCP_LO = B.buildMul(S32, RCP, Den);
-
- // RCP_HI = mulhu (RCP, Den) */
- auto RCP_HI = B.buildUMulH(S32, RCP, Den);
-
- // NEG_RCP_LO = -RCP_LO
- auto Zero = B.buildConstant(S32, 0);
- auto NEG_RCP_LO = B.buildSub(S32, Zero, RCP_LO);
-
- // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- auto CmpRcpHiZero = B.buildICmp(CmpInst::ICMP_EQ, S1, RCP_HI, Zero);
- auto ABS_RCP_LO = B.buildSelect(S32, CmpRcpHiZero, NEG_RCP_LO, RCP_LO);
-
- // Calculate the rounding error from the URECIP instruction
- // E = mulhu(ABS_RCP_LO, RCP)
- auto E = B.buildUMulH(S32, ABS_RCP_LO, RCP);
-
- // RCP_A_E = RCP + E
- auto RCP_A_E = B.buildAdd(S32, RCP, E);
-
- // RCP_S_E = RCP - E
- auto RCP_S_E = B.buildSub(S32, RCP, E);
-
- // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- auto Tmp0 = B.buildSelect(S32, CmpRcpHiZero, RCP_A_E, RCP_S_E);
-
- // Quotient = mulhu(Tmp0, Num)stmp
- auto Quotient = B.buildUMulH(S32, Tmp0, Num);
-
- // Num_S_Remainder = Quotient * Den
- auto Num_S_Remainder = B.buildMul(S32, Quotient, Den);
+ // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
+ // algorithm used here.
- // Remainder = Num - Num_S_Remainder
- auto Remainder = B.buildSub(S32, Num, Num_S_Remainder);
+ // Initial estimate of inv(y).
+ auto FloatY = B.buildUITOFP(S32, Y);
+ auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY});
+ auto Scale = B.buildFConstant(S32, BitsToFloat(0x4f7ffffe));
+ auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale);
+ auto Z = B.buildFPTOUI(S32, ScaledY);
- // Remainder_GE_Den = Remainder >= Den
- auto Remainder_GE_Den = B.buildICmp(CmpInst::ICMP_UGE, S1, Remainder, Den);
+ // One round of UNR.
+ auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y);
+ auto NegYZ = B.buildMul(S32, NegY, Z);
+ Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ));
- // Remainder_GE_Zero = Num >= Num_S_Remainder;
- auto Remainder_GE_Zero = B.buildICmp(CmpInst::ICMP_UGE, S1,
- Num, Num_S_Remainder);
+ // Quotient/remainder estimate.
+ auto Q = B.buildUMulH(S32, X, Z);
+ auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y));
- // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
- auto Tmp1 = B.buildAnd(S1, Remainder_GE_Den, Remainder_GE_Zero);
-
- // Calculate Division result:
-
- // Quotient_A_One = Quotient + 1
+ // First quotient/remainder refinement.
auto One = B.buildConstant(S32, 1);
- auto Quotient_A_One = B.buildAdd(S32, Quotient, One);
-
- // Quotient_S_One = Quotient - 1
- auto Quotient_S_One = B.buildSub(S32, Quotient, One);
-
- // Div = (Tmp1 ? Quotient_A_One : Quotient)
- auto Div = B.buildSelect(S32, Tmp1, Quotient_A_One, Quotient);
-
- // Div = (Remainder_GE_Zero ? Div : Quotient_S_One)
- if (IsDiv) {
- B.buildSelect(DstReg, Remainder_GE_Zero, Div, Quotient_S_One);
- } else {
- Div = B.buildSelect(S32, Remainder_GE_Zero, Div, Quotient_S_One);
-
- // Calculate Rem result:
- auto Remainder_S_Den = B.buildSub(S32, Remainder, Den);
-
- // Remainder_A_Den = Remainder + Den
- auto Remainder_A_Den = B.buildAdd(S32, Remainder, Den);
-
- // Rem = (Tmp1 ? Remainder_S_Den : Remainder)
- auto Rem = B.buildSelect(S32, Tmp1, Remainder_S_Den, Remainder);
+ auto Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y);
+ if (IsDiv)
+ Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q);
+ R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R);
- // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den)
- B.buildSelect(DstReg, Remainder_GE_Zero, Rem, Remainder_A_Den);
- }
+ // Second quotient/remainder refinement.
+ Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y);
+ if (IsDiv)
+ B.buildSelect(DstReg, Cond, B.buildAdd(S32, Q, One), Q);
+ else
+ B.buildSelect(DstReg, Cond, B.buildSub(S32, R, Y), R);
}
bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX6: $vgpr0 = COPY [[SUB4]](s32)
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX6: $vgpr0 = COPY [[SUB3]](s32)
; GFX8-LABEL: name: test_sdiv_s32
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX8: $vgpr0 = COPY [[SUB4]](s32)
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX8: $vgpr0 = COPY [[SUB3]](s32)
; GFX9-LABEL: name: test_sdiv_s32
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX9: $vgpr0 = COPY [[SUB4]](s32)
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX9: $vgpr0 = COPY [[SUB3]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_SDIV %0, %1
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
- ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
- ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
- ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+ ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+ ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+ ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
- ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]]
- ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+ ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+ ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+ ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+ ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]]
+ ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
- ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
- ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
- ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
+ ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+ ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+ ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX8-LABEL: name: test_sdiv_v2s32
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
- ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
- ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
- ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+ ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+ ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+ ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
- ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]]
- ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+ ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+ ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+ ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+ ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]]
+ ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
- ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
- ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
- ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
+ ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+ ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+ ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-LABEL: name: test_sdiv_v2s32
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
- ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
- ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
- ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+ ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+ ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+ ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
- ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]]
- ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+ ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+ ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+ ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+ ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+ ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]]
+ ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
- ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
- ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
+ ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+ ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_sdiv_s16
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_sdiv_s16
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
- ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
- ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
- ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+ ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+ ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+ ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
- ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
- ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+ ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+ ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+ ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+ ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+ ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
- ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
- ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+ ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+ ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
- ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
- ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+ ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
+ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+ ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB7]](s32)
+ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX8-LABEL: name: test_sdiv_v2s16
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
- ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
- ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
- ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+ ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+ ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+ ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
- ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
- ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+ ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+ ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+ ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+ ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+ ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
- ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
- ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+ ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+ ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
- ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
- ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+ ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
+ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+ ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB7]](s32)
+ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX9-LABEL: name: test_sdiv_v2s16
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
- ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
- ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
- ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+ ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+ ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]]
+ ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]]
; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
- ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
- ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+ ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]]
+ ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]]
+ ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]]
+ ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]]
+ ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+ ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]]
; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
- ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
- ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
- ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+ ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]]
+ ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB7]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_sdiv_s7
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_sdiv_s7
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_sdiv_s17
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_sdiv_s17
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX6: $vgpr0 = COPY [[SUB4]](s32)
; GFX8-LABEL: name: test_srem_s32
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX8: $vgpr0 = COPY [[SUB4]](s32)
; GFX9-LABEL: name: test_srem_s32
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX9: $vgpr0 = COPY [[SUB4]](s32)
%0:_(s32) = COPY $vgpr0
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
- ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
- ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+ ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
- ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
- ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
- ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+ ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+ ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+ ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+ ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
- ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
- ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+ ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
- ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
- ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
- ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+ ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+ ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+ ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+ ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
- ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
- ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+ ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
- ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
- ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
- ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+ ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+ ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+ ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+ ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
- ; GFX6: $vgpr0 = COPY [[AND1]](s32)
+ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+ ; GFX6: $vgpr0 = COPY [[AND]](s32)
; GFX8-LABEL: name: test_srem_s16
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
- ; GFX8: $vgpr0 = COPY [[AND1]](s32)
+ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+ ; GFX8: $vgpr0 = COPY [[AND]](s32)
; GFX9-LABEL: name: test_srem_s16
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
- ; GFX9: $vgpr0 = COPY [[AND1]](s32)
+ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+ ; GFX9: $vgpr0 = COPY [[AND]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
- ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
- ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
- ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+ ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+ ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
- ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
- ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
- ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+ ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+ ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+ ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+ ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
- ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX8-LABEL: name: test_srem_v2s16
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
- ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
- ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
- ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+ ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+ ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
- ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
- ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
- ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+ ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+ ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+ ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+ ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
- ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
- ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX9-LABEL: name: test_srem_v2s16
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
- ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
- ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
- ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
- ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+ ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]]
+ ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]]
; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
- ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
- ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
- ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]]
+ ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]]
+ ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]]
+ ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]]
+ ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]]
; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
- ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]]
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX6: $vgpr0 = COPY [[SELECT3]](s32)
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX6: $vgpr0 = COPY [[SELECT2]](s32)
; GFX8-LABEL: name: test_udiv_s32
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]]
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX8: $vgpr0 = COPY [[SELECT3]](s32)
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX8: $vgpr0 = COPY [[SELECT2]](s32)
; GFX9-LABEL: name: test_udiv_s32
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]]
; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX9: $vgpr0 = COPY [[SELECT3]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX9: $vgpr0 = COPY [[SELECT2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_UDIV %0, %1
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]]
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]]
- ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD3]], [[UMULH5]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]]
- ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32)
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]]
+ ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]]
+ ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]]
+ ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+ ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32)
; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX8-LABEL: name: test_udiv_v2s32
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]]
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]]
- ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD3]], [[UMULH5]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]]
- ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32)
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]]
+ ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]]
+ ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]]
+ ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+ ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32)
; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-LABEL: name: test_udiv_v2s32
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]]
; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]]
- ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD3]], [[UMULH5]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]]
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32)
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+ ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]]
+ ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]]
+ ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]]
+ ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32)
; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
- ; GFX6: $vgpr0 = COPY [[AND3]](s32)
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
+ ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX6: $vgpr0 = COPY [[AND2]](s32)
; GFX8-LABEL: name: test_udiv_s16
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
- ; GFX8: $vgpr0 = COPY [[AND3]](s32)
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
+ ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX8: $vgpr0 = COPY [[AND2]](s32)
; GFX9-LABEL: name: test_udiv_s16
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
- ; GFX9: $vgpr0 = COPY [[AND3]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
+ ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX9: $vgpr0 = COPY [[AND2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32)
+ ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]]
- ; GFX6: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[ADD3]], [[UMULH5]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]]
- ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
- ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32)
- ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]]
+ ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]]
+ ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]]
+ ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+ ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+ ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
+ ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+ ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32)
+ ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX8-LABEL: name: test_udiv_v2s16
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32)
+ ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]]
- ; GFX8: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[ADD3]], [[UMULH5]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]]
- ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
- ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32)
- ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]]
+ ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]]
+ ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]]
+ ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+ ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+ ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
+ ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+ ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32)
+ ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX9-LABEL: name: test_udiv_v2s16
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32)
+ ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]]
- ; GFX9: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[ADD3]], [[UMULH5]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]]
- ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32)
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+ ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]]
+ ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]]
+ ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]]
+ ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]]
+ ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]]
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_udiv_s7
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_udiv_s7
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_udiv_s17
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_udiv_s17
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]]
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]]
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX6: $vgpr0 = COPY [[SELECT3]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: $vgpr0 = COPY [[SELECT1]](s32)
; GFX8-LABEL: name: test_urem_s32
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]]
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]]
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX8: $vgpr0 = COPY [[SELECT3]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: $vgpr0 = COPY [[SELECT1]](s32)
; GFX9-LABEL: name: test_urem_s32
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]]
; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]]
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX9: $vgpr0 = COPY [[SELECT3]](s32)
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: $vgpr0 = COPY [[SELECT1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_UREM %0, %1
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]]
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]]
- ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]]
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]]
- ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]]
- ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32)
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]]
+ ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]]
+ ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+ ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32)
; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX8-LABEL: name: test_urem_v2s32
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]]
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]]
- ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]]
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]]
- ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]]
- ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32)
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]]
+ ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]]
+ ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+ ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32)
; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-LABEL: name: test_urem_v2s32
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]]
; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]]
- ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]]
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]]
- ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]]
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32)
+ ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]]
+ ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]]
+ ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32)
; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
- ; GFX6: $vgpr0 = COPY [[AND3]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX6: $vgpr0 = COPY [[AND2]](s32)
; GFX8-LABEL: name: test_urem_s16
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
- ; GFX8: $vgpr0 = COPY [[AND3]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX8: $vgpr0 = COPY [[AND2]](s32)
; GFX9-LABEL: name: test_urem_s16
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
- ; GFX9: $vgpr0 = COPY [[AND3]](s32)
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX9: $vgpr0 = COPY [[AND2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32)
+ ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]]
- ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]]
- ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]]
- ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]]
- ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]]
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]]
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]]
- ; GFX6: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]]
- ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]]
- ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]]
- ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]]
- ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
- ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32)
- ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+ ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]]
+ ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+ ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]]
+ ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]]
+ ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]]
+ ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]]
+ ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+ ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+ ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX8-LABEL: name: test_urem_v2s16
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32)
+ ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]]
- ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]]
- ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]]
- ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]]
- ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]]
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]]
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]]
- ; GFX8: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]]
- ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]]
- ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]]
- ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]]
- ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
- ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32)
- ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+ ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]]
+ ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+ ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+ ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]]
+ ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]]
+ ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]]
+ ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]]
+ ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+ ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+ ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; GFX9-LABEL: name: test_urem_v2s16
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32)
+ ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
- ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
- ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]]
- ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]]
- ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
- ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
- ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]]
- ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
- ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
- ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]]
- ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]]
- ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]]
- ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]]
- ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]]
- ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]]
- ; GFX9: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
- ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]]
- ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]]
- ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]]
- ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]]
- ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
- ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32)
+ ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]]
+ ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]]
+ ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]]
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]]
+ ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]]
+ ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]]
+ ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]]
+ ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]]
+ ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]]
+ ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]]
+ ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]]
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_urem_s7
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_urem_s7
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
; GFX6: $vgpr0 = COPY [[COPY4]](s32)
; GFX8-LABEL: name: test_urem_s17
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
; GFX8: $vgpr0 = COPY [[COPY4]](s32)
; GFX9-LABEL: name: test_urem_s17
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
- ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]]
- ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]]
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
- ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
- ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
- ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
- ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
- ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]]
- ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]]
- ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]]
- ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
- ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]]
- ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]]
- ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
- ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]]
- ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]]
- ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]]
- ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]]
- ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32)
+ ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
+ ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
+ ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
+ ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]]
+ ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]]
+ ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32)
; GFX9: $vgpr0 = COPY [[COPY4]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1
-; GISEL-NEXT: v_mul_hi_u32 v6, v4, v1
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v4, v5
-; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v6, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: s_ashr_i32 s3, s1, 31
; GISEL-NEXT: s_add_i32 s0, s0, s2
; GISEL-NEXT: s_add_i32 s1, s1, s3
-; GISEL-NEXT: s_xor_b32 s4, s0, s2
-; GISEL-NEXT: s_xor_b32 s5, s1, s3
-; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s5
+; GISEL-NEXT: s_xor_b32 s0, s0, s2
+; GISEL-NEXT: s_xor_b32 s4, s1, s3
+; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s4
+; GISEL-NEXT: s_sub_i32 s1, 0, s4
; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_lo_u32 v1, v0, s5
-; GISEL-NEXT: v_mul_hi_u32 v2, v0, s5
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0
-; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1
-; GISEL-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1
+; GISEL-NEXT: v_mul_lo_u32 v1, s1, v0
+; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
+; GISEL-NEXT: v_mul_lo_u32 v1, v0, s4
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v0, s4
-; GISEL-NEXT: v_mul_lo_u32 v1, v0, s5
+; GISEL-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, s4, v1
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, s4, v1
-; GISEL-NEXT: v_cmp_le_u32_e64 s[0:1], s5, v4
-; GISEL-NEXT: s_and_b64 s[0:1], s[0:1], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: s_xor_b32 s0, s2, s3
; GISEL-NEXT: v_xor_b32_e32 v0, s0, v0
; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v10, v13, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5
-; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v12, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s6, 0x1000
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, s6
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
+; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v6
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT: v_subrev_i32_e64 v3, s[4:5], s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT: s_sub_i32 s4, 0, s8
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, s8
-; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8
-; GISEL-NEXT: v_mul_hi_u32 v9, v4, s8
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1
+; GISEL-NEXT: v_mul_lo_u32 v6, s4, v5
+; GISEL-NEXT: v_mul_lo_u32 v7, s4, v4
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v5
-; GISEL-NEXT: v_mul_lo_u32 v9, v4, s8
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v4
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v12
-; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], s8, v0
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v6, s[4:5], s8, v0
+; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5]
+; GISEL-NEXT: v_subrev_i32_e64 v7, s[6:7], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x1000
-; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT: s_mov_b32 s5, 0xfffff000
+; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
+; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, s4
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v5, s4
-; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v6
-; CGP-NEXT: v_mul_hi_u32 v10, v6, v3
-; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v7, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v8, v6
-; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7
-; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8
-; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8
-; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v5, v5, v0
-; CGP-NEXT: v_mul_hi_u32 v6, v6, v1
-; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v5
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v5
-; CGP-NEXT: v_lshlrev_b32_e32 v10, 12, v6
-; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v6
-; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 1, v6
-; CGP-NEXT: v_sub_i32_e32 v13, vcc, v0, v7
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v10
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v10
-; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v8, s[6:7]
-; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v6, v11, s[6:7]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5]
+; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v6
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6
+; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v4
+; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
+; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6
+; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
+; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, s6
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
+; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v6
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT: v_subrev_i32_e64 v3, s[4:5], s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT: s_sub_i32 s4, 0, s8
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, s8
-; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8
-; GISEL-NEXT: v_mul_hi_u32 v9, v4, s8
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1
+; GISEL-NEXT: v_mul_lo_u32 v6, s4, v5
+; GISEL-NEXT: v_mul_lo_u32 v7, s4, v4
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v5
-; GISEL-NEXT: v_mul_lo_u32 v9, v4, s8
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v4
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v12
-; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], s8, v0
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v6, s[4:5], s8, v0
+; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5]
+; GISEL-NEXT: v_subrev_i32_e64 v7, s[6:7], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
-; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT: s_mov_b32 s5, 0xffed2705
+; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
+; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, s8
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_mul_lo_u32 v7, v5, s8
-; CGP-NEXT: v_mul_hi_u32 v8, v5, s8
-; CGP-NEXT: v_mul_lo_u32 v9, v6, v3
-; CGP-NEXT: v_mul_hi_u32 v10, v6, v3
-; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v7, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v8, v6
-; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7
-; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8
-; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8
-; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v5, v5, v0
-; CGP-NEXT: v_mul_hi_u32 v6, v6, v1
-; CGP-NEXT: v_mul_lo_u32 v7, v5, s8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v5
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v5
-; CGP-NEXT: v_mul_lo_u32 v10, v6, v3
-; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v6
-; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 1, v6
-; CGP-NEXT: v_sub_i32_e32 v13, vcc, v0, v7
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v10
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v10
-; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v13
-; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v8, s[6:7]
-; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v6, v11, s[6:7]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5]
+; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT: v_mul_lo_u32 v7, v6, s4
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6
+; CGP-NEXT: v_mul_lo_u32 v9, v4, v3
+; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
+; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6
+; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
+; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3
; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v1
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_lo_u32 v5, v4, v1
-; CHECK-NEXT: v_mul_hi_u32 v6, v4, v1
-; CHECK-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
-; CHECK-NEXT: v_mul_hi_u32 v5, v5, v4
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v4, v5
-; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v0
+; CHECK-NEXT: v_mul_lo_u32 v5, v5, v4
+; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_mul_hi_u32 v4, v0, v4
; CHECK-NEXT: v_mul_lo_u32 v5, v4, v1
; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v4
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 1, v4
-; CHECK-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v6, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v4
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2
-; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3
-; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6
-; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8
-; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9
-; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
+; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2
; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v6
-; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v6
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v3
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v7
-; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v7
-; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v8
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v6, v9, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v12, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v7
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc
+; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v2
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v9, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1
-; GISEL-NEXT: v_mul_hi_u32 v6, v4, v1
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v4, v5
-; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v6, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v10, v13, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5
-; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v12, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
; CHECK-NEXT: s_cbranch_execz BB0_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2
+; CHECK-NEXT: v_mov_b32_e32 v5, 0
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
+; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v4, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc
-; CHECK-NEXT: v_mov_b32_e32 v5, 0
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc
; CHECK-NEXT: BB0_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v0, v4
; CHECK-NEXT: s_cbranch_scc0 BB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
+; CHECK-NEXT: s_sub_i32 s0, 0, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT: v_mul_lo_u32 v1, s0, v0
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0
; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4
-; CHECK-NEXT: v_mul_hi_u32 v2, v0, s4
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
-; CHECK-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1
-; CHECK-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s2, v1
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_mul_hi_u32 v0, v0, s2
-; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4
+; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s2, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, s2, v1
-; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v4
-; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: BB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CGP-NEXT: s_cbranch_execz BB2_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v0, v4
-; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v8
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v1, v4
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v0
-; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB2_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz BB2_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v4, v3, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3
-; CGP-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v2, vcc
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: BB2_8:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz BB7_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4
-; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: BB7_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz BB8_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v5, v0
; CGP-NEXT: v_mul_lo_u32 v1, v0, v10
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v10
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v1
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v5
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v10
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0
-; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v0
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v10
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB8_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz BB8_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v8
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v8
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v4, v3, v8
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3
-; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: BB8_8:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: s_mov_b32 s4, 0xffffff
; GISEL-NEXT: v_and_b32_e32 v1, s4, v2
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1
-; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
+; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: s_setpc_b64 s[30:31]
define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
; GISEL-LABEL: s_srem_i32:
; GISEL: ; %bb.0:
-; GISEL-NEXT: s_ashr_i32 s4, s0, 31
-; GISEL-NEXT: s_ashr_i32 s2, s1, 31
-; GISEL-NEXT: s_add_i32 s0, s0, s4
-; GISEL-NEXT: s_add_i32 s1, s1, s2
-; GISEL-NEXT: s_xor_b32 s3, s0, s4
-; GISEL-NEXT: s_xor_b32 s2, s1, s2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s2
+; GISEL-NEXT: s_ashr_i32 s2, s0, 31
+; GISEL-NEXT: s_ashr_i32 s3, s1, 31
+; GISEL-NEXT: s_add_i32 s0, s0, s2
+; GISEL-NEXT: s_add_i32 s1, s1, s3
+; GISEL-NEXT: s_xor_b32 s0, s0, s2
+; GISEL-NEXT: s_xor_b32 s1, s1, s3
+; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
+; GISEL-NEXT: s_sub_i32 s3, 0, s1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_lo_u32 v1, v0, s2
-; GISEL-NEXT: v_mul_hi_u32 v2, v0, s2
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0
-; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1
-; GISEL-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v0, s3
-; GISEL-NEXT: v_mul_lo_u32 v0, v0, s2
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s3, v0
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
-; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], s2, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[0:1], s3, v0
-; GISEL-NEXT: v_subrev_i32_e64 v0, s[2:3], s2, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
-; GISEL-NEXT: v_xor_b32_e32 v0, s4, v0
-; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0
+; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0
+; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
+; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
+; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0
+; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: ; return to shader part epilog
;
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v2
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v2
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3
-; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v8, v8, v5
-; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v5, v8
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v8
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9
-; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3
-; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7
-; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CHECK-LABEL: v_srem_i32_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_movk_i32 s6, 0x1000
+; CHECK-NEXT: s_movk_i32 s4, 0x1000
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x1000
+; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_lshlrev_b32_e32 v4, 12, v3
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, s6
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v3
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v2
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT: s_add_i32 s10, 0x1000, 0
+; GISEL-NEXT: s_add_i32 s4, 0x1000, 0
; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4
+; GISEL-NEXT: s_sub_i32 s5, 0, s4
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v6, v5, s10
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, s10
-; GISEL-NEXT: v_mul_lo_u32 v8, v4, s10
-; GISEL-NEXT: v_mul_hi_u32 v9, v4, s10
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1
-; GISEL-NEXT: v_mul_lo_u32 v5, v5, s10
-; GISEL-NEXT: v_mul_lo_u32 v4, v4, s10
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v4
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v6
-; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], s10, v6
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], s10, v6
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v7
-; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], s10, v7
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4
-; GISEL-NEXT: v_subrev_i32_e64 v1, s[10:11], s10, v7
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5
+; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x1000
-; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT: s_mov_b32 s5, 0xfffff000
+; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
+; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, s4
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v5, s4
-; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v6
-; CGP-NEXT: v_mul_hi_u32 v10, v6, v3
-; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v7, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v8, v6
-; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7
-; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8
-; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8
-; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v5, v5, v0
-; CGP-NEXT: v_mul_hi_u32 v6, v6, v1
-; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
; CGP-NEXT: v_lshlrev_b32_e32 v6, 12, v6
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
-; CGP-NEXT: v_sub_i32_e32 v8, vcc, v1, v6
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3
-; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v7, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v7, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v8, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v6
-; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v8, v3
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v9, v0, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
; CHECK-LABEL: v_srem_i32_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
+; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_mul_lo_u32 v4, v3, s6
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, s6
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT: v_mul_lo_u32 v3, v3, s6
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v2
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT: s_add_i32 s10, 0x12d8fb, 0
+; GISEL-NEXT: s_add_i32 s4, 0x12d8fb, 0
; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4
+; GISEL-NEXT: s_sub_i32 s5, 0, s4
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v6, v5, s10
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, s10
-; GISEL-NEXT: v_mul_lo_u32 v8, v4, s10
-; GISEL-NEXT: v_mul_hi_u32 v9, v4, s10
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1
-; GISEL-NEXT: v_mul_lo_u32 v5, v5, s10
-; GISEL-NEXT: v_mul_lo_u32 v4, v4, s10
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v4
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v6
-; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], s10, v6
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], s10, v6
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v7
-; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], s10, v7
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4
-; GISEL-NEXT: v_subrev_i32_e64 v1, s[10:11], s10, v7
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5
+; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CGP-LABEL: v_srem_v2i32_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
-; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT: s_mov_b32 s5, 0xffed2705
+; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
+; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, s8
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_mul_lo_u32 v7, v5, s8
-; CGP-NEXT: v_mul_hi_u32 v8, v5, s8
-; CGP-NEXT: v_mul_lo_u32 v9, v6, v3
-; CGP-NEXT: v_mul_hi_u32 v10, v6, v3
-; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v7, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v8, v6
-; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7
-; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8
-; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8
-; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; CGP-NEXT: v_mul_hi_u32 v5, v5, v0
-; CGP-NEXT: v_mul_hi_u32 v6, v6, v1
-; CGP-NEXT: v_mul_lo_u32 v5, v5, s8
-; CGP-NEXT: v_mul_lo_u32 v6, v6, v3
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
-; CGP-NEXT: v_sub_i32_e32 v8, vcc, v1, v6
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v7
-; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v7, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v7, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v8, v3
-; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v6
-; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v8, v3
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v9, v0, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT: v_mul_lo_u32 v6, v6, s4
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3
; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1
+; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_mul_lo_u32 v4, v3, v1
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3
+; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3
; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2
-; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3
-; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6
-; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8
-; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9
-; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
+; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2
; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3
-; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7
-; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1
-; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
+; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v2
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v2
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3
-; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v8, v8, v5
-; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v5, v8
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v8
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9
-; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3
-; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7
-; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v3, v7
; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v1, v7, vcc
; CHECK-NEXT: BB0_2: ; %Flow
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz BB0_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2
+; CHECK-NEXT: v_mov_b32_e32 v5, 0
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1
-; CHECK-NEXT: v_mov_b32_e32 v5, 0
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
+; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v2
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[4:5]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
; CHECK-NEXT: BB0_4:
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, v4
; CHECK-NEXT: v_mov_b32_e32 v1, v5
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: s_cbranch_scc0 BB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
+; CHECK-NEXT: s_sub_i32 s0, 0, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
-; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4
-; CHECK-NEXT: v_mul_hi_u32 v2, v0, s4
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
-; CHECK-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1
-; CHECK-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_mul_hi_u32 v0, v0, s2
+; CHECK-NEXT: v_mul_lo_u32 v1, s0, v0
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0
; CHECK-NEXT: v_mul_lo_u32 v0, v0, s4
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s2, v0
-; CHECK-NEXT: v_add_i32_e64 v2, s[0:1], s4, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[0:1], s2, v0
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
-; CHECK-NEXT: v_subrev_i32_e64 v0, s[2:3], s4, v1
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: BB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v11
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v5, v11, vcc
; CGP-NEXT: BB2_2: ; %Flow2
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB2_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v0, v4
-; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v8
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
; CGP-NEXT: v_mul_lo_u32 v0, v0, v4
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v0
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB2_4:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_or_b32_e32 v5, v3, v7
; CGP-NEXT: v_mov_b32_e32 v4, 0
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v9
; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v9, vcc
; CGP-NEXT: BB2_6: ; %Flow
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB2_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6
+; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v3, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v6
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
-; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v6
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v5, 0
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
; CGP-NEXT: BB2_8:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_mov_b32_e32 v2, v4
; CGP-NEXT: v_mov_b32_e32 v3, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v1, v7, vcc
; CHECK-NEXT: BB7_2: ; %Flow
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz BB7_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4
-; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4
-; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v2, v4
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v4
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; CHECK-NEXT: BB7_4:
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, v2
; CHECK-NEXT: v_mov_b32_e32 v1, v3
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v11
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v4, v11, vcc
; CGP-NEXT: BB8_2: ; %Flow2
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB8_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v10
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v10
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v5, v0
; CGP-NEXT: v_mul_lo_u32 v0, v0, v10
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v0
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v1, v10
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
-; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v10
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB8_4:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_or_b32_e32 v5, v3, v9
; CGP-NEXT: v_mov_b32_e32 v4, 0
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v9
; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v9, vcc
; CGP-NEXT: BB8_6: ; %Flow
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB8_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8
+; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v8
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v8
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v3, v3, v8
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v8
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v8
-; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v8
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v5, 0
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
; CGP-NEXT: BB8_8:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_mov_b32_e32 v2, v4
; CGP-NEXT: v_mov_b32_e32 v3, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: s_mov_b32 s4, 0xffffff
; GISEL-NEXT: v_and_b32_e32 v1, s4, v2
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_i32:
; GISEL-LABEL: s_udiv_i32:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
+; GISEL-NEXT: s_sub_i32 s2, 0, s1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0
+; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1
-; GISEL-NEXT: v_mul_hi_u32 v2, v0, s1
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0
-; GISEL-NEXT: v_add_i32_e64 v2, s[2:3], v0, v1
-; GISEL-NEXT: v_sub_i32_e64 v0, s[2:3], v0, v1
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v0, s0
-; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1
+; GISEL-NEXT: v_subrev_i32_e64 v2, s[2:3], s1, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, s0, v1
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, s0, v1
-; GISEL-NEXT: v_cmp_le_u32_e64 s[0:1], s1, v4
-; GISEL-NEXT: s_and_b64 s[0:1], s[0:1], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: ; return to shader part epilog
;
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4
-; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_v2i32:
; CHECK-LABEL: v_udiv_i32_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_movk_i32 s4, 0x1000
-; CHECK-NEXT: v_mov_b32_e32 v1, 0x1000
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
+; CHECK-NEXT: s_movk_i32 s6, 0x1000
+; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, s4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
-; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v1
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv i32 %num, 4096
ret i32 %result
}
define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_udiv_v2i32_pow2k_denom:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_movk_i32 s4, 0x1000
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x1000
-; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4
-; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT: v_lshlrev_b32_e32 v5, 12, v3
-; CHECK-NEXT: v_mul_hi_u32 v6, v3, s4
-; CHECK-NEXT: v_lshlrev_b32_e32 v7, 12, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2
-; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5
-; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4
-; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5
-; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5
-; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6
-; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1
-; CHECK-NEXT: v_lshlrev_b32_e32 v5, 12, v3
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v3
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3
-; CHECK-NEXT: v_lshlrev_b32_e32 v8, 12, v4
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v4
-; CHECK-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4
-; CHECK-NEXT: v_sub_i32_e32 v11, vcc, v0, v5
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v8
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v2
-; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v6, s[6:7]
-; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e64 v1, v4, v9, s[6:7]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5]
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_udiv_v2i32_pow2k_denom:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_movk_i32 s8, 0x1000
+; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s8
+; GISEL-NEXT: s_sub_i32 s4, 0, s8
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_lo_u32 v4, s4, v3
+; GISEL-NEXT: v_mul_lo_u32 v5, s4, v2
+; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v2, v5
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2
+; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3
+; GISEL-NEXT: v_lshlrev_b32_e32 v6, 12, v2
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0
+; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5]
+; GISEL-NEXT: v_subrev_i32_e64 v5, s[6:7], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v2
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
+; CGP: ; %bb.0:
+; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT: s_movk_i32 s4, 0x1000
+; CGP-NEXT: v_mov_b32_e32 v2, 0x1000
+; CGP-NEXT: s_mov_b32 s5, 0xfffff000
+; CGP-NEXT: v_mov_b32_e32 v3, 0xfffff000
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, s4
+; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
+; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_mul_lo_u32 v6, s5, v4
+; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
+; CGP-NEXT: v_mul_hi_u32 v3, v5, v3
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
+; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
+; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v4
+; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
+; CGP-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
}
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
-; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6
-; CHECK-NEXT: v_mul_hi_u32 v3, v1, s6
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v5
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv i32 %num, 1235195
ret i32 %result
}
define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_udiv_v2i32_oddk_denom:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
-; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8
-; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_lo_u32 v5, v3, s8
-; CHECK-NEXT: v_mul_hi_u32 v6, v3, s8
-; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2
-; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5
-; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4
-; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5
-; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5
-; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6
-; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1
-; CHECK-NEXT: v_mul_lo_u32 v5, v3, s8
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v3
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3
-; CHECK-NEXT: v_mul_lo_u32 v8, v4, v2
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v4
-; CHECK-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4
-; CHECK-NEXT: v_sub_i32_e32 v11, vcc, v0, v5
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v8
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8
-; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v11
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v2
-; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v6, s[6:7]
-; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e64 v1, v4, v9, s[6:7]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5]
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_udiv_v2i32_oddk_denom:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb
+; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s8
+; GISEL-NEXT: s_sub_i32 s4, 0, s8
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_lo_u32 v4, s4, v3
+; GISEL-NEXT: v_mul_lo_u32 v5, s4, v2
+; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v2, v5
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2
+; GISEL-NEXT: v_mul_lo_u32 v4, v3, s8
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3
+; GISEL-NEXT: v_mul_lo_u32 v6, v2, s8
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0
+; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5]
+; GISEL-NEXT: v_subrev_i32_e64 v5, s[6:7], s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v2
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i32_oddk_denom:
+; CGP: ; %bb.0:
+; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
+; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb
+; CGP-NEXT: s_mov_b32 s5, 0xffed2705
+; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
+; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT: v_mul_lo_u32 v5, s5, v3
+; CGP-NEXT: v_mul_lo_u32 v6, s5, v4
+; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT: v_mul_lo_u32 v5, v3, s4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3
+; CGP-NEXT: v_mul_lo_u32 v7, v4, v2
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
+; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3
+; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc
+; CGP-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result
}
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, v1
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shl.y = shl i32 4096, %y
%r = udiv i32 %x, %shl.y
; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2
; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4
-; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_v2i32_pow2_shl_denom:
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_and_b32_e32 v1, s4, v1
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_i32_24bit:
; GISEL-NEXT: v_and_b32_e32 v2, s4, v2
; GISEL-NEXT: v_and_b32_e32 v3, s4, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4
-; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5
-; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7]
-; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_v2i32_24bit:
; CHECK-NEXT: s_cbranch_execz BB0_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
+; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v4, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc
; CHECK-NEXT: v_mov_b32_e32 v5, 0
; CHECK-NEXT: BB0_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_cbranch_scc0 BB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
+; CHECK-NEXT: s_sub_i32 s1, 0, s2
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0
; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2
-; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
-; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0
-; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2
+; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, s0, v1
-; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v4
-; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: BB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CGP-NEXT: s_cbranch_execz BB2_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v0, v4
-; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v8
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v1, v4
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v0
-; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB2_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz BB2_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v4, v3, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3
-; CGP-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v2, vcc
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: BB2_8:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz BB7_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4
-; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4
-; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: BB7_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz BB8_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v5, v0
; CGP-NEXT: v_mul_lo_u32 v1, v0, v10
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v10
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v1
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v5
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v10
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0
-; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v0
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v10
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB8_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_cbranch_execz BB8_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v8
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v8
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v4, v3, v8
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3
-; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8
-; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: BB8_8:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_and_b32_e32 v1, s4, v2
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_i32:
; GISEL-LABEL: s_urem_i32:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
+; GISEL-NEXT: s_sub_i32 s2, 0, s1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1
-; GISEL-NEXT: v_mul_hi_u32 v2, v0, s1
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0
-; GISEL-NEXT: v_add_i32_e64 v2, s[2:3], v0, v1
-; GISEL-NEXT: v_sub_i32_e64 v0, s[2:3], v0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v0, s0
+; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0
+; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v0
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1
-; GISEL-NEXT: v_add_i32_e64 v2, s[2:3], s1, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], s0, v0
-; GISEL-NEXT: v_subrev_i32_e64 v0, s[2:3], s1, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
+; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: ; return to shader part epilog
;
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: s_mov_b32 s4, 0x4f7ffffe
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, s4, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, s4, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3
-; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5
-; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s4, 0x1000
-; CHECK-NEXT: v_mov_b32_e32 v1, 0x1000
+; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, s4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
-; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, 12, v1
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem i32 %num, 4096
ret i32 %result
}
define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_urem_v2i32_pow2k_denom:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_movk_i32 s4, 0x1000
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x1000
-; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4
-; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT: v_lshlrev_b32_e32 v5, 12, v3
-; CHECK-NEXT: v_mul_hi_u32 v6, v3, s4
-; CHECK-NEXT: v_lshlrev_b32_e32 v7, 12, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2
-; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5
-; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4
-; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5
-; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5
-; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6
-; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1
-; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v3
-; CHECK-NEXT: v_lshlrev_b32_e32 v4, 12, v4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v4
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2
-; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v5, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v5, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2
-; CHECK-NEXT: v_add_i32_e64 v3, s[8:9], v6, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4
-; CHECK-NEXT: v_sub_i32_e64 v1, s[10:11], v6, v2
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
-; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9]
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_urem_v2i32_pow2k_denom:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_movk_i32 s4, 0x1000
+; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4
+; GISEL-NEXT: s_sub_i32 s5, 0, s4
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2
+; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2
+; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3
+; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i32_pow2k_denom:
+; CGP: ; %bb.0:
+; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT: s_movk_i32 s4, 0x1000
+; CGP-NEXT: v_mov_b32_e32 v2, 0x1000
+; CGP-NEXT: s_mov_b32 s5, 0x4f7ffffe
+; CGP-NEXT: s_mov_b32 s6, 0xfffff000
+; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
+; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT: v_mul_f32_e32 v3, s5, v3
+; CGP-NEXT: v_mul_f32_e32 v4, s5, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT: v_mul_lo_u32 v5, s6, v3
+; CGP-NEXT: v_mul_lo_u32 v6, s6, v4
+; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
+; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
}
; CHECK-LABEL: v_urem_i32_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT: v_mov_b32_e32 v1, 0x12d8fb
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, s6
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
-; CHECK-NEXT: v_mul_lo_u32 v2, v2, s6
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem i32 %num, 1235195
ret i32 %result
}
define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_urem_v2i32_oddk_denom:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
-; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8
-; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT: v_mul_lo_u32 v5, v3, s8
-; CHECK-NEXT: v_mul_hi_u32 v6, v3, s8
-; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2
-; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5
-; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4
-; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5
-; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5
-; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6
-; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1
-; CHECK-NEXT: v_mul_lo_u32 v3, v3, s8
-; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v4
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v5
-; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v5, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v5, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2
-; CHECK-NEXT: v_add_i32_e64 v3, s[8:9], v6, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4
-; CHECK-NEXT: v_sub_i32_e64 v1, s[10:11], v6, v2
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
-; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9]
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_urem_v2i32_oddk_denom:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb
+; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4
+; GISEL-NEXT: s_sub_i32 s5, 0, s4
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2
+; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, s4
+; GISEL-NEXT: v_mul_lo_u32 v2, v2, s4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
+; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i32_oddk_denom:
+; CGP: ; %bb.0:
+; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
+; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb
+; CGP-NEXT: s_mov_b32 s5, 0xffed2705
+; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4
+; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT: v_mul_lo_u32 v4, s5, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
+; CGP-NEXT: v_mul_lo_u32 v4, v4, s4
+; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2
+; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result
}
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1
-; CHECK-NEXT: v_mul_hi_u32 v4, v2, v1
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shl.y = shl i32 4096, %y
%r = urem i32 %x, %shl.y
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_movk_i32 s4, 0x1000
+; GISEL-NEXT: s_mov_b32 s5, 0x4f7ffffe
; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2
; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, s5, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3
-; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5
-; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i32_pow2_shl_denom:
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_and_b32_e32 v1, s4, v1
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_i32_24bit:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0xffffff
+; GISEL-NEXT: s_mov_b32 s5, 0x4f7ffffe
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_and_b32_e32 v1, s4, v1
; GISEL-NEXT: v_and_b32_e32 v2, s4, v2
; GISEL-NEXT: v_and_b32_e32 v3, s4, v3
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
+; GISEL-NEXT: v_mul_f32_e32 v6, s5, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2
-; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6
-; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7
-; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3
-; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5
-; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i32_24bit:
; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v1, vcc
; CHECK-NEXT: BB0_2: ; %Flow
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz BB0_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
+; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v2
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v2
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[4:5]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
; CHECK-NEXT: v_mov_b32_e32 v5, 0
; CHECK-NEXT: BB0_4:
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, v4
; CHECK-NEXT: v_mov_b32_e32 v1, v5
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: s_cbranch_scc0 BB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
+; CHECK-NEXT: s_sub_i32 s1, 0, s2
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
-; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2
-; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2
-; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
-; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0
+; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0
; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v0
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
-; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], s2, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[0:1], s0, v0
-; CHECK-NEXT: v_subrev_i32_e64 v0, s[2:3], s2, v1
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
+; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: BB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc
; CGP-NEXT: BB2_2: ; %Flow2
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB2_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v0, v4
-; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v8
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
; CGP-NEXT: v_mul_lo_u32 v0, v0, v4
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v0
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0
-; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB2_4:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_or_b32_e32 v5, v3, v7
; CGP-NEXT: v_mov_b32_e32 v4, 0
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v3, vcc
; CGP-NEXT: BB2_6: ; %Flow
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB2_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v3, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v6
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3
-; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v6
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
+; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: BB2_8:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_mov_b32_e32 v2, v4
; CGP-NEXT: v_mov_b32_e32 v3, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc
; CHECK-NEXT: BB7_2: ; %Flow
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz BB7_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4
-; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4
-; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v2, v4
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
-; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v4
-; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5]
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: BB7_4:
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, v2
; CHECK-NEXT: v_mov_b32_e32 v1, v3
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc
; CGP-NEXT: BB8_2: ; %Flow2
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB8_4
; CGP-NEXT: ; %bb.3:
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10
; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v10
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v10
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v1, v1, v0
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
+; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT: v_mul_hi_u32 v0, v5, v0
; CGP-NEXT: v_mul_lo_u32 v0, v0, v10
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v0
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v1, v10
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0
-; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v10
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: BB8_4:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_or_b32_e32 v5, v3, v9
; CGP-NEXT: v_mov_b32_e32 v4, 0
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v3, vcc
; CGP-NEXT: BB8_6: ; %Flow
-; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7]
-; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
; CGP-NEXT: s_cbranch_execz BB8_8
; CGP-NEXT: ; %bb.7:
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v8
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v8
-; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v4, v3
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_mul_lo_u32 v3, v3, v8
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v8
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v8
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3
-; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v8
-; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8
+; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
; CGP-NEXT: v_mov_b32_e32 v5, 0
; CGP-NEXT: BB8_8:
-; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_mov_b32_e32 v2, v4
; CGP-NEXT: v_mov_b32_e32 v3, v5
; CGP-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
; GISEL-NEXT: v_and_b32_e32 v1, s4, v2
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
-; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
-; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1
-; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;