// %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
// %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
// %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
- // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
- // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
+ // %4 = add iNh %1.0, %2.0 as iN
+ // %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH)
//
- // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
+ // %lo = %3.LO
+ // %hi = %5.0
+ // %ovf = %0 || %1.1 || %2.1 || %5.1
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
GetExpandedInteger(LHS, LHSLow, LHSHigh);
GetExpandedInteger(RHS, RHSLow, RHSHigh);
EVT HalfVT = LHSLow.getValueType();
EVT BitVT = N->getValueType(1);
- SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
- SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
+ SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT);
SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
- SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
+ SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
- SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
- One.getValue(0));
- SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
+ SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
- SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
- Two.getValue(0));
+
+ SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two);
// Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
// know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
- SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
- SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
- Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
- SplitInteger(Five, Lo, Hi);
+ SplitInteger(Three, Lo, Hi);
+
+ Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; AARCH-LABEL: muloti_test:
; AARCH: // %bb.0: // %start
-; AARCH-NEXT: mul x8, x3, x0
-; AARCH-NEXT: umulh x9, x0, x2
-; AARCH-NEXT: madd x11, x1, x2, x8
-; AARCH-NEXT: add x8, x9, x11
-; AARCH-NEXT: cmp x8, x9
-; AARCH-NEXT: cset w9, lo
-; AARCH-NEXT: cmp x11, #0 // =0
-; AARCH-NEXT: csel w9, wzr, w9, eq
; AARCH-NEXT: cmp x3, #0 // =0
-; AARCH-NEXT: umulh x10, x1, x2
-; AARCH-NEXT: cset w12, ne
+; AARCH-NEXT: umulh x8, x1, x2
+; AARCH-NEXT: cset w10, ne
; AARCH-NEXT: cmp x1, #0 // =0
-; AARCH-NEXT: umulh x11, x3, x0
-; AARCH-NEXT: cset w13, ne
-; AARCH-NEXT: cmp xzr, x10
-; AARCH-NEXT: and w10, w13, w12
-; AARCH-NEXT: cset w12, ne
-; AARCH-NEXT: cmp xzr, x11
-; AARCH-NEXT: orr w10, w10, w12
+; AARCH-NEXT: mul x9, x3, x0
; AARCH-NEXT: cset w11, ne
+; AARCH-NEXT: cmp xzr, x8
+; AARCH-NEXT: umulh x8, x3, x0
+; AARCH-NEXT: madd x9, x1, x2, x9
+; AARCH-NEXT: and w10, w11, w10
+; AARCH-NEXT: cset w11, ne
+; AARCH-NEXT: cmp xzr, x8
+; AARCH-NEXT: umulh x8, x0, x2
; AARCH-NEXT: orr w10, w10, w11
-; AARCH-NEXT: orr w9, w10, w9
+; AARCH-NEXT: cset w11, ne
+; AARCH-NEXT: adds x1, x8, x9
+; AARCH-NEXT: orr w8, w10, w11
+; AARCH-NEXT: cset w9, hs
+; AARCH-NEXT: orr w8, w8, w9
; AARCH-NEXT: mul x0, x0, x2
-; AARCH-DAG: mov x1, x8
-; AARCH-DAG: mov w2, w9
+; AARCH-NEXT: mov w2, w8
; AARCH-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; CHECK-LABEL: umulo_v2i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul x9, x7, x2
-; CHECK-NEXT: umulh x10, x2, x6
-; CHECK-NEXT: madd x9, x3, x6, x9
-; CHECK-NEXT: add x15, x10, x9
-; CHECK-NEXT: cmp x15, x10
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: cmp x9, #0 // =0
-; CHECK-NEXT: csel w10, wzr, w10, eq
; CHECK-NEXT: cmp x7, #0 // =0
-; CHECK-NEXT: umulh x11, x3, x6
-; CHECK-NEXT: mul x13, x5, x0
-; CHECK-NEXT: cset w17, ne
+; CHECK-NEXT: umulh x8, x3, x6
+; CHECK-NEXT: cset w13, ne
; CHECK-NEXT: cmp x3, #0 // =0
-; CHECK-NEXT: umulh x12, x7, x2
-; CHECK-NEXT: umulh x9, x0, x4
-; CHECK-NEXT: madd x13, x1, x4, x13
-; CHECK-NEXT: cset w18, ne
-; CHECK-NEXT: cmp xzr, x11
+; CHECK-NEXT: umulh x9, x7, x2
+; CHECK-NEXT: mul x10, x7, x2
+; CHECK-NEXT: cset w14, ne
+; CHECK-NEXT: cmp xzr, x8
; CHECK-NEXT: ldr x8, [sp]
-; CHECK-NEXT: add x11, x9, x13
-; CHECK-NEXT: and w17, w18, w17
-; CHECK-NEXT: cset w18, ne
-; CHECK-NEXT: cmp xzr, x12
-; CHECK-NEXT: orr w12, w17, w18
-; CHECK-NEXT: cset w17, ne
-; CHECK-NEXT: cmp x11, x9
-; CHECK-NEXT: orr w9, w12, w17
-; CHECK-NEXT: cset w12, lo
-; CHECK-NEXT: cmp x13, #0 // =0
-; CHECK-NEXT: mul x14, x2, x6
-; CHECK-NEXT: csel w12, wzr, w12, eq
-; CHECK-NEXT: cmp x5, #0 // =0
-; CHECK-NEXT: stp x14, x15, [x8, #16]
-; CHECK-NEXT: umulh x14, x1, x4
-; CHECK-NEXT: cset w13, ne
-; CHECK-NEXT: cmp x1, #0 // =0
-; CHECK-NEXT: umulh x16, x5, x0
-; CHECK-NEXT: cset w17, ne
-; CHECK-NEXT: cmp xzr, x14
-; CHECK-NEXT: and w13, w17, w13
+; CHECK-NEXT: umulh x11, x2, x6
+; CHECK-NEXT: madd x10, x3, x6, x10
+; CHECK-NEXT: and w13, w14, w13
; CHECK-NEXT: cset w14, ne
-; CHECK-NEXT: cmp xzr, x16
+; CHECK-NEXT: cmp xzr, x9
; CHECK-NEXT: orr w13, w13, w14
; CHECK-NEXT: cset w14, ne
+; CHECK-NEXT: adds x10, x11, x10
+; CHECK-NEXT: mul x12, x2, x6
; CHECK-NEXT: orr w13, w13, w14
-; CHECK-NEXT: orr w12, w13, w12
+; CHECK-NEXT: cset w14, hs
+; CHECK-NEXT: cmp x5, #0 // =0
+; CHECK-NEXT: umulh x17, x1, x4
+; CHECK-NEXT: stp x12, x10, [x8, #16]
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: cmp x1, #0 // =0
+; CHECK-NEXT: umulh x9, x5, x0
+; CHECK-NEXT: mul x11, x5, x0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp xzr, x17
+; CHECK-NEXT: umulh x15, x0, x4
+; CHECK-NEXT: madd x11, x1, x4, x11
+; CHECK-NEXT: and w10, w12, w10
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp xzr, x9
+; CHECK-NEXT: orr w9, w10, w12
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: adds x11, x15, x11
+; CHECK-NEXT: orr w9, w9, w10
+; CHECK-NEXT: cset w10, hs
; CHECK-NEXT: orr w9, w9, w10
-; CHECK-NEXT: fmov s0, w12
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: orr w10, w13, w14
+; CHECK-NEXT: fmov s0, w9
+; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-NEXT: mul x15, x0, x4
+; CHECK-NEXT: mul x16, x0, x4
; CHECK-NEXT: sshr v0.2s, v0.2s, #31
-; CHECK-NEXT: stp x15, x11, [x8]
+; CHECK-NEXT: stp x16, x11, [x8]
; CHECK-NEXT: ret
%t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
; CHECK-NEXT: cmpdi r3, 0
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: rldic r5, r5, 4, 32
-; CHECK-NEXT: crnot 4*cr5+gt, eq
+; CHECK-NEXT: crnot 4*cr5+lt, eq
; CHECK-NEXT: mulhdu r3, r3, r5
; CHECK-NEXT: maddld r6, r4, r5, r3
-; CHECK-NEXT: cmpld r6, r3
-; CHECK-NEXT: mulld r3, r4, r5
-; CHECK-NEXT: cmpldi cr1, r3, 0
-; CHECK-NEXT: crandc 4*cr5+lt, lt, 4*cr1+eq
+; CHECK-NEXT: cmpld cr1, r6, r3
; CHECK-NEXT: mulhdu. r3, r4, r5
-; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10
+; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10
; CHECK-NEXT: # %bb.8: # %L670
-; CHECK-NEXT: crorc 4*cr5+lt, 4*cr5+lt, eq
+; CHECK-NEXT: crorc 4*cr5+lt, 4*cr1+lt, eq
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10
; CHECK-NEXT: # %bb.9: # %L917
; CHECK-NEXT: .LBB0_10: # %L994
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; PPC64-LABEL: muloti_test:
; PPC64: # %bb.0: # %start
-; PPC64-NEXT: mulhdu. 8, 3, 6
-; PPC64-NEXT: mcrf 1, 0
-; PPC64-NEXT: mulld 8, 5, 4
-; PPC64-NEXT: cmpdi 3, 0
-; PPC64-NEXT: mulld 3, 3, 6
-; PPC64-NEXT: cmpdi 5, 5, 0
-; PPC64-NEXT: add 3, 3, 8
-; PPC64-NEXT: crnor 20, 22, 2
-; PPC64-NEXT: mulhdu 8, 4, 6
-; PPC64-NEXT: cmpldi 3, 0
-; PPC64-NEXT: add 3, 8, 3
-; PPC64-NEXT: cmpld 6, 3, 8
-; PPC64-NEXT: crandc 21, 24, 2
-; PPC64-NEXT: crorc 20, 20, 6
-; PPC64-NEXT: mulhdu. 5, 5, 4
-; PPC64-NEXT: crorc 20, 20, 2
-; PPC64-NEXT: li 7, 1
-; PPC64-NEXT: crnor 20, 20, 21
+; PPC64-NEXT: addic 8, 5, -1
+; PPC64-NEXT: mulhdu 9, 5, 4
+; PPC64-NEXT: mulld 10, 5, 4
+; PPC64-NEXT: subfe 5, 8, 5
+; PPC64-NEXT: mulld 8, 3, 6
+; PPC64-NEXT: add 8, 8, 10
+; PPC64-NEXT: addic 10, 3, -1
+; PPC64-NEXT: mulhdu 7, 3, 6
+; PPC64-NEXT: subfe 3, 10, 3
+; PPC64-NEXT: and 5, 3, 5
+; PPC64-NEXT: addic 3, 7, -1
+; PPC64-NEXT: subfe 7, 3, 7
+; PPC64-NEXT: or 5, 5, 7
+; PPC64-NEXT: mulhdu 10, 4, 6
+; PPC64-NEXT: addic 7, 9, -1
+; PPC64-NEXT: add 3, 10, 8
+; PPC64-NEXT: subfe 7, 7, 9
+; PPC64-NEXT: or 5, 5, 7
+; PPC64-NEXT: subc 7, 3, 10
+; PPC64-NEXT: subfe 7, 3, 3
+; PPC64-NEXT: neg 7, 7
+; PPC64-NEXT: or 5, 5, 7
; PPC64-NEXT: mulld 4, 4, 6
-; PPC64-NEXT: bc 12, 20, .LBB0_2
-; PPC64-NEXT: # %bb.1: # %start
-; PPC64-NEXT: ori 5, 7, 0
-; PPC64-NEXT: blr
-; PPC64-NEXT: .LBB0_2: # %start
-; PPC64-NEXT: li 5, 0
; PPC64-NEXT: blr
;
; PPC32-LABEL: muloti_test:
; PPC32: # %bb.0: # %start
; PPC32-NEXT: mflr 0
; PPC32-NEXT: stw 0, 4(1)
-; PPC32-NEXT: stwu 1, -80(1)
-; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT: stwu 1, -64(1)
+; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill
; PPC32-NEXT: mfcr 12
-; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 27, 4
-; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 29, 7
-; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 30, 8
-; PPC32-NEXT: mr 26, 3
+; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 26, 7
+; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 28, 4
+; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 29, 8
+; PPC32-NEXT: mr 24, 3
; PPC32-NEXT: li 3, 0
; PPC32-NEXT: li 4, 0
; PPC32-NEXT: li 7, 0
; PPC32-NEXT: li 8, 0
-; PPC32-NEXT: stw 20, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 23, 6
-; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 24, 5
-; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 25, 10
-; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 28, 9
-; PPC32-NEXT: stw 12, 28(1)
+; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 23, 5
+; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 25, 9
+; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 27, 6
+; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT: mr 30, 10
+; PPC32-NEXT: stw 12, 24(1)
; PPC32-NEXT: bl __multi3
-; PPC32-NEXT: mr 7, 4
-; PPC32-NEXT: mullw 4, 24, 30
-; PPC32-NEXT: cmpwi 5, 24, 0
-; PPC32-NEXT: cmpwi 6, 26, 0
-; PPC32-NEXT: cmpwi 7, 28, 0
-; PPC32-NEXT: crnor 9, 30, 26
-; PPC32-NEXT: mullw 8, 29, 23
-; PPC32-NEXT: add 21, 8, 4
-; PPC32-NEXT: mullw 11, 28, 27
-; PPC32-NEXT: mullw 12, 26, 25
-; PPC32-NEXT: add 11, 12, 11
-; PPC32-NEXT: cmplwi 7, 11, 0
-; PPC32-NEXT: mulhwu 9, 30, 23
-; PPC32-NEXT: add 12, 9, 21
-; PPC32-NEXT: cmplw 6, 12, 9
-; PPC32-NEXT: mulhwu 10, 27, 25
-; PPC32-NEXT: mullw 0, 30, 23
-; PPC32-NEXT: mullw 22, 27, 25
-; PPC32-NEXT: addc 4, 22, 0
-; PPC32-NEXT: add 0, 10, 11
-; PPC32-NEXT: adde 8, 0, 12
-; PPC32-NEXT: addc 4, 7, 4
-; PPC32-NEXT: adde 8, 3, 8
-; PPC32-NEXT: xor 22, 4, 7
-; PPC32-NEXT: xor 20, 8, 3
-; PPC32-NEXT: or. 22, 22, 20
-; PPC32-NEXT: mcrf 1, 0
-; PPC32-NEXT: cmpwi 29, 0
-; PPC32-NEXT: crnor 8, 22, 2
-; PPC32-NEXT: mulhwu. 23, 29, 23
+; PPC32-NEXT: mulhwu. 9, 26, 27
+; PPC32-NEXT: mfcr 9 # cr0
+; PPC32-NEXT: cmpwi 2, 26, 0
+; PPC32-NEXT: stw 9, 20(1)
+; PPC32-NEXT: cmpwi 3, 23, 0
+; PPC32-NEXT: crnor 12, 14, 10
+; PPC32-NEXT: cmpwi 4, 24, 0
+; PPC32-NEXT: mulhwu. 9, 23, 29
; PPC32-NEXT: mcrf 5, 0
-; PPC32-NEXT: cmplwi 21, 0
-; PPC32-NEXT: crandc 10, 24, 2
-; PPC32-NEXT: cmplw 3, 0, 10
-; PPC32-NEXT: crandc 11, 12, 30
+; PPC32-NEXT: cmpwi 1, 25, 0
+; PPC32-NEXT: crnor 4, 6, 18
; PPC32-NEXT: mulhwu. 9, 24, 30
; PPC32-NEXT: mcrf 6, 0
-; PPC32-NEXT: cmplw 4, 7
-; PPC32-NEXT: cmplw 7, 8, 3
-; PPC32-NEXT: crand 12, 30, 0
-; PPC32-NEXT: crandc 13, 28, 30
-; PPC32-NEXT: mulhwu. 3, 26, 25
+; PPC32-NEXT: mulhwu. 0, 25, 28
; PPC32-NEXT: mcrf 7, 0
-; PPC32-NEXT: cror 0, 12, 13
-; PPC32-NEXT: crandc 12, 0, 6
-; PPC32-NEXT: crorc 20, 8, 22
-; PPC32-NEXT: crorc 20, 20, 26
-; PPC32-NEXT: mulhwu. 3, 28, 27
-; PPC32-NEXT: mcrf 1, 0
-; PPC32-NEXT: crorc 25, 9, 30
-; PPC32-NEXT: or. 3, 27, 26
-; PPC32-NEXT: cror 24, 20, 10
-; PPC32-NEXT: mcrf 5, 0
-; PPC32-NEXT: crorc 25, 25, 6
-; PPC32-NEXT: or. 3, 30, 29
-; PPC32-NEXT: cror 25, 25, 11
-; PPC32-NEXT: crnor 20, 2, 22
-; PPC32-NEXT: lwz 12, 28(1)
-; PPC32-NEXT: cror 20, 20, 25
-; PPC32-NEXT: cror 20, 20, 24
-; PPC32-NEXT: crnor 20, 20, 12
+; PPC32-NEXT: or. 0, 28, 24
+; PPC32-NEXT: mcrf 2, 0
+; PPC32-NEXT: or. 0, 29, 26
+; PPC32-NEXT: crnor 5, 2, 10
+; PPC32-NEXT: mullw 10, 26, 27
+; PPC32-NEXT: lwz 26, 20(1)
+; PPC32-NEXT: mullw 9, 23, 29
+; PPC32-NEXT: add 9, 10, 9
+; PPC32-NEXT: mtcrf 128, 26 # cr0
+; PPC32-NEXT: crorc 6, 12, 2
+; PPC32-NEXT: crorc 20, 6, 22
+; PPC32-NEXT: mulhwu 7, 29, 27
+; PPC32-NEXT: add 9, 7, 9
+; PPC32-NEXT: cmplw 9, 7
+; PPC32-NEXT: crorc 21, 4, 26
+; PPC32-NEXT: cror 20, 20, 0
+; PPC32-NEXT: crorc 21, 21, 30
+; PPC32-NEXT: mullw 11, 25, 28
+; PPC32-NEXT: mullw 12, 24, 30
+; PPC32-NEXT: add 10, 12, 11
+; PPC32-NEXT: lwz 12, 24(1)
+; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
+; PPC32-NEXT: mulhwu 8, 28, 30
+; PPC32-NEXT: add 10, 8, 10
+; PPC32-NEXT: cmplw 10, 8
+; PPC32-NEXT: cror 21, 21, 0
+; PPC32-NEXT: cror 21, 5, 21
+; PPC32-NEXT: cror 20, 21, 20
+; PPC32-NEXT: mullw 0, 29, 27
+; PPC32-NEXT: mtcrf 32, 12 # cr2
+; PPC32-NEXT: mtcrf 16, 12 # cr3
+; PPC32-NEXT: mtcrf 8, 12 # cr4
+; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
+; PPC32-NEXT: mullw 7, 28, 30
+; PPC32-NEXT: addc 7, 7, 0
+; PPC32-NEXT: adde 11, 10, 9
+; PPC32-NEXT: addc 9, 4, 7
+; PPC32-NEXT: adde 8, 3, 11
+; PPC32-NEXT: cmplw 6, 9, 4
+; PPC32-NEXT: cmplw 8, 3
+; PPC32-NEXT: crand 22, 2, 24
+; PPC32-NEXT: crandc 23, 0, 2
+; PPC32-NEXT: cror 22, 22, 23
+; PPC32-NEXT: crnor 20, 20, 22
; PPC32-NEXT: li 3, 1
; PPC32-NEXT: bc 12, 20, .LBB0_2
; PPC32-NEXT: # %bb.1: # %start
; PPC32-NEXT: li 7, 0
; PPC32-NEXT: .LBB0_3: # %start
; PPC32-NEXT: mr 3, 8
-; PPC32-NEXT: mtcrf 32, 12 # cr2
-; PPC32-NEXT: mtcrf 16, 12 # cr3
-; PPC32-NEXT: lwz 30, 72(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 29, 68(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 28, 64(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 27, 60(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 26, 56(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 25, 52(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 24, 48(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 23, 44(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 22, 40(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 21, 36(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 20, 32(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 0, 84(1)
-; PPC32-NEXT: addi 1, 1, 80
+; PPC32-NEXT: mr 4, 9
+; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 0, 68(1)
+; PPC32-NEXT: addi 1, 1, 64
; PPC32-NEXT: mtlr 0
; PPC32-NEXT: blr
start:
; SPARC-LABEL: muloti_test:
; SPARC: .cfi_startproc
; SPARC-NEXT: ! %bb.0: ! %start
-; SPARC-NEXT: save %sp, -128, %sp
+; SPARC-NEXT: save %sp, -120, %sp
; SPARC-NEXT: .cfi_def_cfa_register %fp
; SPARC-NEXT: .cfi_window_save
; SPARC-NEXT: .cfi_register %o7, %i7
-; SPARC-NEXT: ld [%fp+92], %l3
+; SPARC-NEXT: ld [%fp+92], %l4
; SPARC-NEXT: ld [%fp+96], %g2
; SPARC-NEXT: umul %i2, %i5, %g3
; SPARC-NEXT: rd %y, %g4
-; SPARC-NEXT: st %g4, [%fp+-20] ! 4-byte Folded Spill
+; SPARC-NEXT: st %g4, [%fp+-12] ! 4-byte Folded Spill
; SPARC-NEXT: umul %i4, %i3, %g4
; SPARC-NEXT: rd %y, %l0
-; SPARC-NEXT: st %l0, [%fp+-24] ! 4-byte Folded Spill
+; SPARC-NEXT: st %l0, [%fp+-16] ! 4-byte Folded Spill
; SPARC-NEXT: st %g2, [%sp+96]
-; SPARC-NEXT: st %i5, [%fp+-8] ! 4-byte Folded Spill
; SPARC-NEXT: umul %i5, %i3, %l0
-; SPARC-NEXT: rd %y, %l5
-; SPARC-NEXT: st %l3, [%sp+92]
-; SPARC-NEXT: umul %l3, %i1, %l4
-; SPARC-NEXT: rd %y, %i5
-; SPARC-NEXT: st %i5, [%fp+-12] ! 4-byte Folded Spill
-; SPARC-NEXT: add %g4, %g3, %l2
-; SPARC-NEXT: mov %i0, %i5
-; SPARC-NEXT: umul %i0, %g2, %g3
-; SPARC-NEXT: rd %y, %i0
-; SPARC-NEXT: st %i0, [%fp+-16] ! 4-byte Folded Spill
-; SPARC-NEXT: add %l5, %l2, %l1
-; SPARC-NEXT: st %i1, [%fp+-4] ! 4-byte Folded Spill
-; SPARC-NEXT: umul %i1, %g2, %g2
; SPARC-NEXT: rd %y, %l6
-; SPARC-NEXT: add %g3, %l4, %i0
-; SPARC-NEXT: add %l6, %i0, %l7
-; SPARC-NEXT: addcc %g2, %l0, %l4
+; SPARC-NEXT: st %l4, [%sp+92]
+; SPARC-NEXT: umul %l4, %i1, %l2
+; SPARC-NEXT: rd %y, %l1
+; SPARC-NEXT: st %l1, [%fp+-4] ! 4-byte Folded Spill
+; SPARC-NEXT: add %g4, %g3, %g3
+; SPARC-NEXT: umul %i0, %g2, %g4
+; SPARC-NEXT: rd %y, %l1
+; SPARC-NEXT: st %l1, [%fp+-8] ! 4-byte Folded Spill
+; SPARC-NEXT: add %l6, %g3, %l3
+; SPARC-NEXT: umul %i1, %g2, %g2
+; SPARC-NEXT: rd %y, %l1
+; SPARC-NEXT: add %g4, %l2, %g3
+; SPARC-NEXT: add %l1, %g3, %l2
+; SPARC-NEXT: addcc %g2, %l0, %l7
; SPARC-NEXT: mov %g0, %l0
-; SPARC-NEXT: addxcc %l7, %l1, %i1
+; SPARC-NEXT: addxcc %l2, %l3, %l5
; SPARC-NEXT: mov %l0, %o0
; SPARC-NEXT: mov %l0, %o1
; SPARC-NEXT: mov %i2, %o2
; SPARC-NEXT: mov %l0, %o4
; SPARC-NEXT: call __multi3
; SPARC-NEXT: mov %l0, %o5
-; SPARC-NEXT: addcc %o1, %l4, %i3
-; SPARC-NEXT: addxcc %o0, %i1, %g2
+; SPARC-NEXT: addcc %o1, %l7, %i3
+; SPARC-NEXT: addxcc %o0, %l5, %g2
; SPARC-NEXT: mov 1, %g3
; SPARC-NEXT: cmp %g2, %o0
; SPARC-NEXT: bcs .LBB0_2
-; SPARC-NEXT: mov %g3, %g4
+; SPARC-NEXT: mov %g3, %o4
; SPARC-NEXT: ! %bb.1: ! %start
-; SPARC-NEXT: mov %l0, %g4
+; SPARC-NEXT: mov %l0, %o4
; SPARC-NEXT: .LBB0_2: ! %start
; SPARC-NEXT: cmp %i3, %o1
; SPARC-NEXT: bcs .LBB0_4
-; SPARC-NEXT: mov %g3, %o4
+; SPARC-NEXT: mov %g3, %g4
; SPARC-NEXT: ! %bb.3: ! %start
-; SPARC-NEXT: mov %l0, %o4
+; SPARC-NEXT: mov %l0, %g4
; SPARC-NEXT: .LBB0_4: ! %start
; SPARC-NEXT: cmp %g2, %o0
; SPARC-NEXT: be .LBB0_6
; SPARC-NEXT: nop
; SPARC-NEXT: ! %bb.5: ! %start
-; SPARC-NEXT: mov %g4, %o4
+; SPARC-NEXT: mov %o4, %g4
; SPARC-NEXT: .LBB0_6: ! %start
-; SPARC-NEXT: xor %g2, %o0, %i1
-; SPARC-NEXT: xor %i3, %o1, %g4
-; SPARC-NEXT: or %g4, %i1, %i1
-; SPARC-NEXT: cmp %i1, 0
-; SPARC-NEXT: be .LBB0_8
-; SPARC-NEXT: mov %l0, %g4
+; SPARC-NEXT: cmp %i2, 0
+; SPARC-NEXT: bne .LBB0_8
+; SPARC-NEXT: mov %g3, %i2
; SPARC-NEXT: ! %bb.7: ! %start
-; SPARC-NEXT: mov %o4, %g4
+; SPARC-NEXT: mov %l0, %i2
; SPARC-NEXT: .LBB0_8: ! %start
-; SPARC-NEXT: cmp %l1, %l5
-; SPARC-NEXT: mov %g3, %l1
-; SPARC-NEXT: bcs .LBB0_10
-; SPARC-NEXT: mov %i5, %i1
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bne .LBB0_10
+; SPARC-NEXT: mov %g3, %o1
; SPARC-NEXT: ! %bb.9: ! %start
-; SPARC-NEXT: mov %l0, %l1
+; SPARC-NEXT: mov %l0, %o1
; SPARC-NEXT: .LBB0_10: ! %start
-; SPARC-NEXT: cmp %l2, 0
-; SPARC-NEXT: be .LBB0_12
-; SPARC-NEXT: mov %l0, %o0
+; SPARC-NEXT: ld [%fp+-16], %l5 ! 4-byte Folded Reload
+; SPARC-NEXT: cmp %l5, 0
+; SPARC-NEXT: bne .LBB0_12
+; SPARC-NEXT: mov %g3, %o0
; SPARC-NEXT: ! %bb.11: ! %start
-; SPARC-NEXT: mov %l1, %o0
+; SPARC-NEXT: mov %l0, %o0
; SPARC-NEXT: .LBB0_12: ! %start
-; SPARC-NEXT: cmp %i2, 0
+; SPARC-NEXT: ld [%fp+-12], %l5 ! 4-byte Folded Reload
+; SPARC-NEXT: cmp %l5, 0
; SPARC-NEXT: bne .LBB0_14
-; SPARC-NEXT: mov %g3, %i2
+; SPARC-NEXT: mov %g3, %l5
; SPARC-NEXT: ! %bb.13: ! %start
-; SPARC-NEXT: mov %l0, %i2
+; SPARC-NEXT: mov %l0, %l5
; SPARC-NEXT: .LBB0_14: ! %start
-; SPARC-NEXT: cmp %i4, 0
-; SPARC-NEXT: bne .LBB0_16
-; SPARC-NEXT: mov %g3, %o1
+; SPARC-NEXT: cmp %l3, %l6
+; SPARC-NEXT: bcs .LBB0_16
+; SPARC-NEXT: mov %g3, %l3
; SPARC-NEXT: ! %bb.15: ! %start
-; SPARC-NEXT: mov %l0, %o1
+; SPARC-NEXT: mov %l0, %l3
; SPARC-NEXT: .LBB0_16: ! %start
-; SPARC-NEXT: ld [%fp+-24], %i5 ! 4-byte Folded Reload
-; SPARC-NEXT: cmp %i5, 0
+; SPARC-NEXT: cmp %l4, 0
; SPARC-NEXT: bne .LBB0_18
-; SPARC-NEXT: mov %g3, %l5
+; SPARC-NEXT: mov %g3, %l4
; SPARC-NEXT: ! %bb.17: ! %start
-; SPARC-NEXT: mov %l0, %l5
+; SPARC-NEXT: mov %l0, %l4
; SPARC-NEXT: .LBB0_18: ! %start
-; SPARC-NEXT: ld [%fp+-20], %i5 ! 4-byte Folded Reload
-; SPARC-NEXT: cmp %i5, 0
+; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: bne .LBB0_20
-; SPARC-NEXT: mov %g3, %l1
+; SPARC-NEXT: mov %g3, %l7
; SPARC-NEXT: ! %bb.19: ! %start
-; SPARC-NEXT: mov %l0, %l1
+; SPARC-NEXT: mov %l0, %l7
; SPARC-NEXT: .LBB0_20: ! %start
-; SPARC-NEXT: cmp %l7, %l6
-; SPARC-NEXT: bcs .LBB0_22
+; SPARC-NEXT: ld [%fp+-8], %l6 ! 4-byte Folded Reload
+; SPARC-NEXT: cmp %l6, 0
+; SPARC-NEXT: bne .LBB0_22
; SPARC-NEXT: mov %g3, %l6
; SPARC-NEXT: ! %bb.21: ! %start
; SPARC-NEXT: mov %l0, %l6
; SPARC-NEXT: .LBB0_22: ! %start
-; SPARC-NEXT: cmp %i0, 0
-; SPARC-NEXT: be .LBB0_24
-; SPARC-NEXT: mov %l0, %l2
+; SPARC-NEXT: and %o1, %i2, %i2
+; SPARC-NEXT: ld [%fp+-4], %o1 ! 4-byte Folded Reload
+; SPARC-NEXT: cmp %o1, 0
+; SPARC-NEXT: and %l7, %l4, %o1
+; SPARC-NEXT: bne .LBB0_24
+; SPARC-NEXT: mov %g3, %l4
; SPARC-NEXT: ! %bb.23: ! %start
-; SPARC-NEXT: mov %l6, %l2
+; SPARC-NEXT: mov %l0, %l4
; SPARC-NEXT: .LBB0_24: ! %start
-; SPARC-NEXT: cmp %l3, 0
-; SPARC-NEXT: bne .LBB0_26
-; SPARC-NEXT: mov %g3, %l3
+; SPARC-NEXT: or %i2, %o0, %l7
+; SPARC-NEXT: cmp %l2, %l1
+; SPARC-NEXT: or %o1, %l6, %l2
+; SPARC-NEXT: bcs .LBB0_26
+; SPARC-NEXT: mov %g3, %i2
; SPARC-NEXT: ! %bb.25: ! %start
-; SPARC-NEXT: mov %l0, %l3
+; SPARC-NEXT: mov %l0, %i2
; SPARC-NEXT: .LBB0_26: ! %start
-; SPARC-NEXT: cmp %i1, 0
+; SPARC-NEXT: or %l7, %l5, %l1
+; SPARC-NEXT: or %i5, %i4, %i4
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: or %l2, %l4, %l2
; SPARC-NEXT: bne .LBB0_28
-; SPARC-NEXT: mov %g3, %l4
+; SPARC-NEXT: mov %g3, %i4
; SPARC-NEXT: ! %bb.27: ! %start
-; SPARC-NEXT: mov %l0, %l4
+; SPARC-NEXT: mov %l0, %i4
; SPARC-NEXT: .LBB0_28: ! %start
-; SPARC-NEXT: and %o1, %i2, %i2
-; SPARC-NEXT: ld [%fp+-16], %i0 ! 4-byte Folded Reload
+; SPARC-NEXT: or %l1, %l3, %i5
+; SPARC-NEXT: or %i1, %i0, %i0
; SPARC-NEXT: cmp %i0, 0
-; SPARC-NEXT: and %l4, %l3, %l4
; SPARC-NEXT: bne .LBB0_30
-; SPARC-NEXT: mov %g3, %l6
+; SPARC-NEXT: or %l2, %i2, %i0
; SPARC-NEXT: ! %bb.29: ! %start
-; SPARC-NEXT: mov %l0, %l6
-; SPARC-NEXT: .LBB0_30: ! %start
-; SPARC-NEXT: or %i2, %l5, %l3
-; SPARC-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload
-; SPARC-NEXT: cmp %i0, 0
-; SPARC-NEXT: or %l4, %l6, %i2
-; SPARC-NEXT: bne .LBB0_32
-; SPARC-NEXT: mov %g3, %l4
-; SPARC-NEXT: ! %bb.31: ! %start
-; SPARC-NEXT: mov %l0, %l4
-; SPARC-NEXT: .LBB0_32: ! %start
-; SPARC-NEXT: or %l3, %l1, %l1
-; SPARC-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
-; SPARC-NEXT: or %i0, %i4, %i0
-; SPARC-NEXT: cmp %i0, 0
-; SPARC-NEXT: or %i2, %l4, %i5
-; SPARC-NEXT: bne .LBB0_34
-; SPARC-NEXT: mov %g3, %i2
-; SPARC-NEXT: ! %bb.33: ! %start
-; SPARC-NEXT: mov %l0, %i2
-; SPARC-NEXT: .LBB0_34: ! %start
-; SPARC-NEXT: or %l1, %o0, %i4
-; SPARC-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
-; SPARC-NEXT: or %i0, %i1, %i0
-; SPARC-NEXT: cmp %i0, 0
-; SPARC-NEXT: bne .LBB0_36
-; SPARC-NEXT: or %i5, %l2, %i0
-; SPARC-NEXT: ! %bb.35: ! %start
; SPARC-NEXT: mov %l0, %g3
-; SPARC-NEXT: .LBB0_36: ! %start
-; SPARC-NEXT: and %g3, %i2, %i1
+; SPARC-NEXT: .LBB0_30: ! %start
+; SPARC-NEXT: and %g3, %i4, %i1
; SPARC-NEXT: or %i1, %i0, %i0
-; SPARC-NEXT: or %i0, %i4, %i0
+; SPARC-NEXT: or %i0, %i5, %i0
; SPARC-NEXT: or %i0, %g4, %i0
; SPARC-NEXT: and %i0, 1, %i4
; SPARC-NEXT: mov %g2, %i0
; SPARC64-NEXT: add %o0, %i5, %i1
; SPARC64-NEXT: mov %g0, %i3
; SPARC64-NEXT: cmp %i1, %o0
-; SPARC64-NEXT: mov %i3, %g2
-; SPARC64-NEXT: movcs %xcc, 1, %g2
-; SPARC64-NEXT: cmp %i5, 0
-; SPARC64-NEXT: move %xcc, 0, %g2
+; SPARC64-NEXT: mov %i3, %i5
+; SPARC64-NEXT: movcs %xcc, 1, %i5
; SPARC64-NEXT: cmp %i4, 0
; SPARC64-NEXT: mov %i3, %i4
; SPARC64-NEXT: movne %xcc, 1, %i4
; SPARC64-NEXT: cmp %l0, 0
-; SPARC64-NEXT: mov %i3, %i5
-; SPARC64-NEXT: movne %xcc, 1, %i5
+; SPARC64-NEXT: mov %i3, %g2
+; SPARC64-NEXT: movne %xcc, 1, %g2
; SPARC64-NEXT: cmp %i2, 0
; SPARC64-NEXT: mov %i3, %i2
; SPARC64-NEXT: movne %xcc, 1, %i2
; SPARC64-NEXT: cmp %i0, 0
; SPARC64-NEXT: movne %xcc, 1, %i3
; SPARC64-NEXT: and %i3, %i2, %i0
-; SPARC64-NEXT: or %i0, %i5, %i0
-; SPARC64-NEXT: or %i0, %i4, %i0
; SPARC64-NEXT: or %i0, %g2, %i0
+; SPARC64-NEXT: or %i0, %i4, %i0
+; SPARC64-NEXT: or %i0, %i5, %i0
; SPARC64-NEXT: srl %i0, 0, %i2
; SPARC64-NEXT: mov %i1, %i0
; SPARC64-NEXT: ret