SDValue LL = SDValue(), SDValue LH = SDValue(),
SDValue RL = SDValue(), SDValue RH = SDValue()) const;
+ /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit
+ /// urem by constant and other arithmetic ops. The n/2-bit urem by constant
+ /// will be expanded by DAGCombiner. This is not possible for all constant
+ /// divisors.
+ /// \param N Node to expand
+ /// \param Result A vector that will be filled with the lo and high parts of
+ /// the results. For *DIVREM, this will be the quotient parts followed
+ /// by the remainder parts.
+ /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be
+ /// half of VT.
+ /// \param LL Low bits of the LHS of the operation. You can use this
+ /// parameter if you want to control how low bits are extracted from
+ /// the LHS.
+ /// \param LH High bits of the LHS of the operation. See LL for meaning.
+ /// \returns true if the node has been expanded, false if it has not.
+ bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ SDValue LL = SDValue(),
+ SDValue LH = SDValue()) const;
+
/// Expand funnel shift.
/// \param N Node to expand
/// \returns The expansion if successful, SDValue() otherwise
return;
}
+ // Try to expand UDIV by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UDIV_I16;
return;
}
+ // Try to expand UREM by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UREM_I16;
return Ok;
}
+// Optimize unsigned division or remainder by constants for types twice as large
+// as a legal VT.
+//
+// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
+// can be computed
+// as:
+// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
+// Remainder = Sum % Constant
+// This is based on "Remainder by Summing Digits" from Hacker's Delight.
+//
+// For division, we can compute the remainder, subtract it from the dividend,
+// and then multiply by the multiplicative inverse modulo (1 << (BitWidth / 2)).
+bool TargetLowering::expandDIVREMByConstant(SDNode *N,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ SDValue LL, SDValue LH) const {
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+
+ // TODO: Support signed division/remainder.
+ if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
+ return false;
+ assert(
+ (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
+ "Unexpected opcode");
+
+ auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CN)
+ return false;
+
+ const APInt &Divisor = CN->getAPIntValue();
+ unsigned BitWidth = Divisor.getBitWidth();
+ unsigned HBitWidth = BitWidth / 2;
+ assert(VT.getScalarSizeInBits() == BitWidth &&
+ HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
+
+ // Divisor needs to less than (1 << HBitWidth).
+ APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
+ if (Divisor.uge(HalfMaxPlus1))
+ return false;
+
+ // We depend on the UREM by constant optimization in DAGCombiner that requires
+ // high multiply.
+ if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
+ !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
+ return false;
+
+ // Don't expand if optimizing for size.
+ if (DAG.shouldOptForSize())
+ return false;
+
+ // Early out for 0, 1 or even divisors.
+ if (Divisor.ule(1) || Divisor[0] == 0)
+ return false;
+
+ SDLoc dl(N);
+ SDValue Sum;
+
+ // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
+ // then add in the carry.
+ // TODO: If we can't split it in half, we might be able to split into 3 or
+ // more pieces using a smaller bit width.
+ if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
+ assert(!LL == !LH && "Expected both input halves or no input halves!");
+ if (!LL) {
+ LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
+ DAG.getIntPtrConstant(1, dl));
+ }
+
+ // Use addcarry if we can, otherwise use a compare to detect overflow.
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
+ if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
+ SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
+ Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
+ Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
+ DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
+ } else {
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
+ SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
+ // If the boolean for the target is 0 or 1, we can add the setcc result
+ // directly.
+ if (getBooleanContents(HiLoVT) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
+ else
+ Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
+ DAG.getConstant(0, dl, HiLoVT));
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
+ }
+ }
+
+ // If we didn't find a sum, we can't do the expansion.
+ if (!Sum)
+ return false;
+
+ // Perform a HiLoVT urem on the Sum using truncated divisor.
+ SDValue RemL =
+ DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
+ DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
+ // High half of the remainder is 0.
+ SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
+
+ // If we only want remainder, we're done.
+ if (Opcode == ISD::UREM) {
+ Result.push_back(RemL);
+ Result.push_back(RemH);
+ return true;
+ }
+
+ // Otherwise, we need to compute the quotient.
+
+ // Join the remainder halves.
+ SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
+
+ // Subtract the remainder from the input.
+ SDValue In = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Rem);
+
+ // Multiply by the multiplicative inverse of the divisor modulo
+ // (1 << BitWidth).
+ APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
+ APInt MulFactor = Divisor.zext(BitWidth + 1);
+ MulFactor = MulFactor.multiplicativeInverse(Mod);
+ MulFactor = MulFactor.trunc(BitWidth);
+
+ SDValue Quotient =
+ DAG.getNode(ISD::MUL, dl, VT, In, DAG.getConstant(MulFactor, dl, VT));
+
+ // Split the quotient into low and high parts.
+ SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(1, dl));
+ Result.push_back(QuotL);
+ Result.push_back(QuotH);
+ // For DIVREM, also return the remainder parts.
+ if (Opcode == ISD::UDIVREM) {
+ Result.push_back(RemL);
+ Result.push_back(RemH);
+ }
+
+ return true;
+}
+
// Check that (every element of) Z is undef or not an exact multiple of BW.
static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
return ISD::matchUnaryPredicate(
"Invalid opcode for Div/Rem lowering");
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
SDLoc dl(Op);
+ if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
+ SmallVector<SDValue> Result;
+ if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
+ SDValue Res0 =
+ DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
+ SDValue Res1 =
+ DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
+ {Res0, Res1});
+ }
+ }
+
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
// If the target has hardware divide, use divide + multiply + subtract:
// div = a / b
// rem = a - b * div
// Lowers REM using divmod helpers
// see RTABI section 4.2/4.3
SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
+ SmallVector<SDValue> Result;
+ if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
+ Result[0], Result[1]);
+ }
+
// Build return types (div and rem)
std::vector<Type*> RetTyParams;
Type *RetTyElement;
- switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
"Unexpected return type for lowering");
+ if (isa<ConstantSDNode>(Op->getOperand(1))) {
+ SmallVector<SDValue> Result;
+ if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i64, DAG))
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), VT, Result[0], Result[1]);
+ }
+
RTLIB::Libcall LC;
bool isSigned;
switch (Op->getOpcode()) {
%tmp1 = urem i64 %a, %b ; <i64> [#uses=1]
ret i64 %tmp1
}
+
+; Make sure we avoid a libcall for some constants.
+define i64 @f7(i64 %a) {
+; CHECK-SWDIV-LABEL: f7
+; CHECK-SWDIV: adc
+; CHECK-SWDIV: umull
+; CHECK-HWDIV-LABEL: f7
+; CHECK-HWDIV: adc
+; CHECK-HWDIV: umull
+; CHECK-EABI-LABEL: f7
+; CHECK-EABI: adc
+; CHECK-EABI: umull
+ %tmp1 = urem i64 %a, 3
+ ret i64 %tmp1
+}
+
+; Make sure we avoid a libcall for some constants.
+define i64 @f8(i64 %a) {
+; CHECK-SWDIV-LABEL: f8
+; CHECK-SWDIV: adc
+; CHECK-SWDIV: umull
+; CHECK-HWDIV-LABEL: f8
+; CHECK-HWDIV: adc
+; CHECK-HWDIV: umull
+; CHECK-EABI-LABEL: f8
+; CHECK-EABI: adc
+; CHECK-EABI: umull
+ %tmp1 = udiv i64 %a, 3
+ ret i64 %tmp1
+}
define i64 @udiv64_constant_no_add(i64 %a) nounwind {
; RV32-LABEL: udiv64_constant_no_add:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 5
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 838861
+; RV32-NEXT: addi a4, a3, -819
+; RV32-NEXT: mulhu a5, a2, a4
+; RV32-NEXT: srli a6, a5, 2
+; RV32-NEXT: andi a5, a5, -4
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: sub a5, a0, a2
+; RV32-NEXT: addi a3, a3, -820
+; RV32-NEXT: mul a3, a5, a3
+; RV32-NEXT: mulhu a6, a5, a4
+; RV32-NEXT: add a3, a6, a3
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a3, a0
+; RV32-NEXT: mul a0, a5, a4
; RV32-NEXT: ret
;
; RV64-LABEL: udiv64_constant_no_add:
;
; RV32IM-LABEL: udiv64_constant:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: addi sp, sp, -16
-; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: li a2, 5
-; RV32IM-NEXT: li a3, 0
-; RV32IM-NEXT: call __udivdi3@plt
-; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT: addi sp, sp, 16
+; RV32IM-NEXT: add a2, a0, a1
+; RV32IM-NEXT: sltu a3, a2, a0
+; RV32IM-NEXT: add a2, a2, a3
+; RV32IM-NEXT: lui a3, 838861
+; RV32IM-NEXT: addi a4, a3, -819
+; RV32IM-NEXT: mulhu a5, a2, a4
+; RV32IM-NEXT: srli a6, a5, 2
+; RV32IM-NEXT: andi a5, a5, -4
+; RV32IM-NEXT: add a5, a5, a6
+; RV32IM-NEXT: sub a2, a2, a5
+; RV32IM-NEXT: sub a5, a0, a2
+; RV32IM-NEXT: addi a3, a3, -820
+; RV32IM-NEXT: mul a3, a5, a3
+; RV32IM-NEXT: mulhu a6, a5, a4
+; RV32IM-NEXT: add a3, a6, a3
+; RV32IM-NEXT: sltu a0, a0, a2
+; RV32IM-NEXT: sub a0, a1, a0
+; RV32IM-NEXT: mul a0, a0, a4
+; RV32IM-NEXT: add a1, a3, a0
+; RV32IM-NEXT: mul a0, a5, a4
; RV32IM-NEXT: ret
;
; RV64I-LABEL: udiv64_constant:
define iXLen2 @test_udiv_3(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_3:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 3
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 699051
+; RV32-NEXT: addi a4, a3, -1365
+; RV32-NEXT: mulhu a5, a2, a4
+; RV32-NEXT: srli a6, a5, 1
+; RV32-NEXT: andi a5, a5, -2
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: sub a5, a0, a2
+; RV32-NEXT: addi a3, a3, -1366
+; RV32-NEXT: mul a3, a5, a3
+; RV32-NEXT: mulhu a6, a5, a4
+; RV32-NEXT: add a3, a6, a3
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a3, a0
+; RV32-NEXT: mul a0, a5, a4
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_3:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 3
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI0_0)
+; RV64-NEXT: ld a2, %lo(.LCPI0_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a4, a3, a2
+; RV64-NEXT: srli a5, a4, 1
+; RV64-NEXT: andi a4, a4, -2
+; RV64-NEXT: lui a6, %hi(.LCPI0_1)
+; RV64-NEXT: ld a6, %lo(.LCPI0_1)(a6)
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: sub a3, a3, a4
+; RV64-NEXT: sub a4, a0, a3
+; RV64-NEXT: mul a5, a4, a6
+; RV64-NEXT: mulhu a6, a4, a2
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: sltu a0, a0, a3
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a2
+; RV64-NEXT: add a1, a5, a0
+; RV64-NEXT: mul a0, a4, a2
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 3
ret iXLen2 %a
define iXLen2 @test_udiv_5(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_5:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 5
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 838861
+; RV32-NEXT: addi a4, a3, -819
+; RV32-NEXT: mulhu a5, a2, a4
+; RV32-NEXT: srli a6, a5, 2
+; RV32-NEXT: andi a5, a5, -4
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: sub a5, a0, a2
+; RV32-NEXT: addi a3, a3, -820
+; RV32-NEXT: mul a3, a5, a3
+; RV32-NEXT: mulhu a6, a5, a4
+; RV32-NEXT: add a3, a6, a3
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a3, a0
+; RV32-NEXT: mul a0, a5, a4
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_5:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 5
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI1_0)
+; RV64-NEXT: ld a2, %lo(.LCPI1_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a4, a3, a2
+; RV64-NEXT: srli a5, a4, 2
+; RV64-NEXT: andi a4, a4, -4
+; RV64-NEXT: lui a6, %hi(.LCPI1_1)
+; RV64-NEXT: ld a6, %lo(.LCPI1_1)(a6)
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: sub a3, a3, a4
+; RV64-NEXT: sub a4, a0, a3
+; RV64-NEXT: mul a5, a4, a6
+; RV64-NEXT: mulhu a6, a4, a2
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: sltu a0, a0, a3
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a2
+; RV64-NEXT: add a1, a5, a0
+; RV64-NEXT: mul a0, a4, a2
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 5
ret iXLen2 %a
define iXLen2 @test_udiv_15(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_15:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 15
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 559241
+; RV32-NEXT: addi a3, a3, -1911
+; RV32-NEXT: mulhu a3, a2, a3
+; RV32-NEXT: srli a3, a3, 3
+; RV32-NEXT: slli a4, a3, 4
+; RV32-NEXT: sub a3, a3, a4
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: sub a3, a0, a2
+; RV32-NEXT: lui a4, 978671
+; RV32-NEXT: addi a5, a4, -274
+; RV32-NEXT: mul a5, a3, a5
+; RV32-NEXT: addi a4, a4, -273
+; RV32-NEXT: mulhu a6, a3, a4
+; RV32-NEXT: add a5, a6, a5
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a5, a0
+; RV32-NEXT: mul a0, a3, a4
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_15:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 15
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI4_0)
+; RV64-NEXT: ld a2, %lo(.LCPI4_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a2, a3, a2
+; RV64-NEXT: srli a2, a2, 3
+; RV64-NEXT: slli a4, a2, 4
+; RV64-NEXT: sub a2, a2, a4
+; RV64-NEXT: lui a4, %hi(.LCPI4_1)
+; RV64-NEXT: ld a4, %lo(.LCPI4_1)(a4)
+; RV64-NEXT: lui a5, %hi(.LCPI4_2)
+; RV64-NEXT: ld a5, %lo(.LCPI4_2)(a5)
+; RV64-NEXT: add a2, a3, a2
+; RV64-NEXT: sub a3, a0, a2
+; RV64-NEXT: mul a4, a3, a4
+; RV64-NEXT: mulhu a6, a3, a5
+; RV64-NEXT: add a4, a6, a4
+; RV64-NEXT: sltu a0, a0, a2
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a5
+; RV64-NEXT: add a1, a4, a0
+; RV64-NEXT: mul a0, a3, a5
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 15
ret iXLen2 %a
define iXLen2 @test_udiv_17(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_17:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 17
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 986895
+; RV32-NEXT: addi a4, a3, 241
+; RV32-NEXT: mulhu a5, a2, a4
+; RV32-NEXT: srli a6, a5, 4
+; RV32-NEXT: andi a5, a5, -16
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: sub a5, a0, a2
+; RV32-NEXT: addi a3, a3, 240
+; RV32-NEXT: mul a3, a5, a3
+; RV32-NEXT: mulhu a6, a5, a4
+; RV32-NEXT: add a3, a6, a3
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a3, a0
+; RV32-NEXT: mul a0, a5, a4
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_17:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 17
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI5_0)
+; RV64-NEXT: ld a2, %lo(.LCPI5_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a4, a3, a2
+; RV64-NEXT: srli a5, a4, 4
+; RV64-NEXT: andi a4, a4, -16
+; RV64-NEXT: lui a6, %hi(.LCPI5_1)
+; RV64-NEXT: ld a6, %lo(.LCPI5_1)(a6)
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: sub a3, a3, a4
+; RV64-NEXT: sub a4, a0, a3
+; RV64-NEXT: mul a5, a4, a6
+; RV64-NEXT: mulhu a6, a4, a2
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: sltu a0, a0, a3
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a2
+; RV64-NEXT: add a1, a5, a0
+; RV64-NEXT: mul a0, a4, a2
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 17
ret iXLen2 %a
define iXLen2 @test_udiv_255(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_255:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 255
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 526344
+; RV32-NEXT: addi a3, a3, 129
+; RV32-NEXT: mulhu a3, a2, a3
+; RV32-NEXT: srli a3, a3, 7
+; RV32-NEXT: slli a4, a3, 8
+; RV32-NEXT: sub a3, a3, a4
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: sub a3, a0, a2
+; RV32-NEXT: lui a4, 1044464
+; RV32-NEXT: addi a5, a4, -258
+; RV32-NEXT: mul a5, a3, a5
+; RV32-NEXT: addi a4, a4, -257
+; RV32-NEXT: mulhu a6, a3, a4
+; RV32-NEXT: add a5, a6, a5
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a5, a0
+; RV32-NEXT: mul a0, a3, a4
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_255:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 255
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI6_0)
+; RV64-NEXT: ld a2, %lo(.LCPI6_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a2, a3, a2
+; RV64-NEXT: srli a2, a2, 7
+; RV64-NEXT: slli a4, a2, 8
+; RV64-NEXT: sub a2, a2, a4
+; RV64-NEXT: lui a4, %hi(.LCPI6_1)
+; RV64-NEXT: ld a4, %lo(.LCPI6_1)(a4)
+; RV64-NEXT: lui a5, %hi(.LCPI6_2)
+; RV64-NEXT: ld a5, %lo(.LCPI6_2)(a5)
+; RV64-NEXT: add a2, a3, a2
+; RV64-NEXT: sub a3, a0, a2
+; RV64-NEXT: mul a4, a3, a4
+; RV64-NEXT: mulhu a6, a3, a5
+; RV64-NEXT: add a4, a6, a4
+; RV64-NEXT: sltu a0, a0, a2
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a5
+; RV64-NEXT: add a1, a4, a0
+; RV64-NEXT: mul a0, a3, a5
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 255
ret iXLen2 %a
define iXLen2 @test_udiv_257(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_257:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 257
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 1044496
+; RV32-NEXT: addi a4, a3, -255
+; RV32-NEXT: mulhu a5, a2, a4
+; RV32-NEXT: srli a6, a5, 8
+; RV32-NEXT: andi a5, a5, -256
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: sub a5, a0, a2
+; RV32-NEXT: addi a3, a3, -256
+; RV32-NEXT: mul a3, a5, a3
+; RV32-NEXT: mulhu a6, a5, a4
+; RV32-NEXT: add a3, a6, a3
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: mul a0, a0, a4
+; RV32-NEXT: add a1, a3, a0
+; RV32-NEXT: mul a0, a5, a4
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_257:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 257
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI7_0)
+; RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a4, a3, a2
+; RV64-NEXT: srli a5, a4, 8
+; RV64-NEXT: andi a4, a4, -256
+; RV64-NEXT: lui a6, %hi(.LCPI7_1)
+; RV64-NEXT: ld a6, %lo(.LCPI7_1)(a6)
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: sub a3, a3, a4
+; RV64-NEXT: sub a4, a0, a3
+; RV64-NEXT: mul a5, a4, a6
+; RV64-NEXT: mulhu a6, a4, a2
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: sltu a0, a0, a3
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a2
+; RV64-NEXT: add a1, a5, a0
+; RV64-NEXT: mul a0, a4, a2
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 257
ret iXLen2 %a
define iXLen2 @test_udiv_65535(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_65535:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a2, 16
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 524296
+; RV32-NEXT: addi a3, a3, 1
+; RV32-NEXT: mulhu a3, a2, a3
+; RV32-NEXT: srli a3, a3, 15
+; RV32-NEXT: slli a4, a3, 16
+; RV32-NEXT: sub a3, a3, a4
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: sub a3, a0, a2
+; RV32-NEXT: lui a4, 1048560
+; RV32-NEXT: addi a5, a4, -2
+; RV32-NEXT: mul a5, a3, a5
+; RV32-NEXT: addi a4, a4, -1
+; RV32-NEXT: mulhu a4, a3, a4
+; RV32-NEXT: add a4, a4, a5
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: slli a1, a0, 16
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sub a1, a4, a0
+; RV32-NEXT: slli a0, a3, 16
+; RV32-NEXT: add a0, a0, a3
+; RV32-NEXT: neg a0, a0
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_65535:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a2, 16
-; RV64-NEXT: addiw a2, a2, -1
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI8_0)
+; RV64-NEXT: ld a2, %lo(.LCPI8_0)(a2)
+; RV64-NEXT: add a3, a0, a1
+; RV64-NEXT: sltu a4, a3, a0
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: mulhu a2, a3, a2
+; RV64-NEXT: srli a2, a2, 15
+; RV64-NEXT: slli a4, a2, 16
+; RV64-NEXT: sub a2, a2, a4
+; RV64-NEXT: add a2, a3, a2
+; RV64-NEXT: sub a3, a0, a2
+; RV64-NEXT: lui a4, 983039
+; RV64-NEXT: slli a4, a4, 4
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: slli a4, a4, 16
+; RV64-NEXT: addi a5, a4, -2
+; RV64-NEXT: mul a5, a3, a5
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: mulhu a6, a3, a4
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: sltu a0, a0, a2
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a4
+; RV64-NEXT: add a1, a5, a0
+; RV64-NEXT: mul a0, a3, a4
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 65535
ret iXLen2 %a
define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_65537:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a2, 16
-; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __udivdi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a2, a0, a1
+; RV32-NEXT: sltu a3, a2, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: lui a3, 1048560
+; RV32-NEXT: addi a4, a3, 1
+; RV32-NEXT: mulhu a5, a2, a4
+; RV32-NEXT: and a3, a5, a3
+; RV32-NEXT: srli a5, a5, 16
+; RV32-NEXT: or a3, a3, a5
+; RV32-NEXT: sub a2, a2, a3
+; RV32-NEXT: sub a3, a0, a2
+; RV32-NEXT: mulhu a4, a3, a4
+; RV32-NEXT: slli a5, a3, 16
+; RV32-NEXT: sub a4, a4, a5
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: slli a1, a0, 16
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: add a1, a4, a0
+; RV32-NEXT: sub a0, a3, a5
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_65537:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a2, 16
-; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __udivti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: add a2, a0, a1
+; RV64-NEXT: sltu a3, a2, a0
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: lui a3, 983041
+; RV64-NEXT: slli a3, a3, 4
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: slli a3, a3, 16
+; RV64-NEXT: addi a4, a3, 1
+; RV64-NEXT: mulhu a5, a2, a4
+; RV64-NEXT: lui a6, 1048560
+; RV64-NEXT: and a6, a5, a6
+; RV64-NEXT: srli a5, a5, 16
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: sub a2, a2, a5
+; RV64-NEXT: sub a5, a0, a2
+; RV64-NEXT: mul a3, a5, a3
+; RV64-NEXT: mulhu a6, a5, a4
+; RV64-NEXT: add a3, a6, a3
+; RV64-NEXT: sltu a0, a0, a2
+; RV64-NEXT: sub a0, a1, a0
+; RV64-NEXT: mul a0, a0, a4
+; RV64-NEXT: add a1, a3, a0
+; RV64-NEXT: mul a0, a5, a4
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 65537
ret iXLen2 %a
define iXLen2 @test_urem_3(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_3:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 3
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 699051
+; RV32-NEXT: addi a1, a1, -1365
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a2, a1, 1
+; RV32-NEXT: andi a1, a1, -2
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_3:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 3
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI0_0)
+; RV64-NEXT: ld a2, %lo(.LCPI0_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a2, a1, 1
+; RV64-NEXT: andi a1, a1, -2
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 3
ret iXLen2 %a
define iXLen2 @test_urem_5(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_5:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 5
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 838861
+; RV32-NEXT: addi a1, a1, -819
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a2, a1, 2
+; RV32-NEXT: andi a1, a1, -4
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_5:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 5
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI1_0)
+; RV64-NEXT: ld a2, %lo(.LCPI1_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: andi a1, a1, -4
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 5
ret iXLen2 %a
define iXLen2 @test_urem_15(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_15:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 15
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 559241
+; RV32-NEXT: addi a1, a1, -1911
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a1, a1, 3
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: sub a1, a1, a2
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_15:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 15
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI4_0)
+; RV64-NEXT: ld a2, %lo(.LCPI4_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: sub a1, a1, a2
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 15
ret iXLen2 %a
define iXLen2 @test_urem_17(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_17:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 17
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 986895
+; RV32-NEXT: addi a1, a1, 241
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a2, a1, 4
+; RV32-NEXT: andi a1, a1, -16
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_17:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 17
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI5_0)
+; RV64-NEXT: ld a2, %lo(.LCPI5_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a2, a1, 4
+; RV64-NEXT: andi a1, a1, -16
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 17
ret iXLen2 %a
define iXLen2 @test_urem_255(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_255:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 255
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 526344
+; RV32-NEXT: addi a1, a1, 129
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a1, a1, 7
+; RV32-NEXT: slli a2, a1, 8
+; RV32-NEXT: sub a1, a1, a2
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_255:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 255
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI6_0)
+; RV64-NEXT: ld a2, %lo(.LCPI6_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a1, a1, 7
+; RV64-NEXT: slli a2, a1, 8
+; RV64-NEXT: sub a1, a1, a2
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 255
ret iXLen2 %a
define iXLen2 @test_urem_257(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_257:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a2, 257
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 1044496
+; RV32-NEXT: addi a1, a1, -255
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a2, a1, 8
+; RV32-NEXT: andi a1, a1, -256
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_257:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a2, 257
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI7_0)
+; RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a2, a1, 8
+; RV64-NEXT: andi a1, a1, -256
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 257
ret iXLen2 %a
define iXLen2 @test_urem_65535(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_65535:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a2, 16
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 524296
+; RV32-NEXT: addi a1, a1, 1
+; RV32-NEXT: mulhu a1, a0, a1
+; RV32-NEXT: srli a1, a1, 15
+; RV32-NEXT: slli a2, a1, 16
+; RV32-NEXT: sub a1, a1, a2
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_65535:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a2, 16
-; RV64-NEXT: addiw a2, a2, -1
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: lui a2, %hi(.LCPI8_0)
+; RV64-NEXT: ld a2, %lo(.LCPI8_0)(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: mulhu a1, a0, a2
+; RV64-NEXT: srli a1, a1, 15
+; RV64-NEXT: slli a2, a1, 16
+; RV64-NEXT: sub a1, a1, a2
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 65535
ret iXLen2 %a
define iXLen2 @test_urem_65537(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_65537:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a2, 16
-; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: li a3, 0
-; RV32-NEXT: call __umoddi3@plt
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: lui a1, 1048560
+; RV32-NEXT: addi a2, a1, 1
+; RV32-NEXT: mulhu a2, a0, a2
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: srli a2, a2, 16
+; RV32-NEXT: or a1, a1, a2
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_65537:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a2, 16
-; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: li a3, 0
-; RV64-NEXT: call __umodti3@plt
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: sltu a0, a1, a0
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: lui a1, 983041
+; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: slli a1, a1, 16
+; RV64-NEXT: addi a1, a1, 1
+; RV64-NEXT: mulhu a1, a0, a1
+; RV64-NEXT: lui a2, 1048560
+; RV64-NEXT: and a2, a1, a2
+; RV64-NEXT: srli a1, a1, 16
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 65537
ret iXLen2 %a
; CHECK-NEXT: lea %s2, __umodti3@lo
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lea.sl %s12, __umodti3@hi(, %s2)
-; CHECK-NEXT: or %s2, 3, (0)1
+; CHECK-NEXT: or %s2, 11, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: or %s11, 0, %s9
- %r = urem i128 %a, 3
+ %r = urem i128 %a, 11
ret i128 %r
}
define i64 @urem_i64_3(i64 %x) nounwind {
; X32-LABEL: urem_i64_3:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $3
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl %edx
+; X32-NEXT: leal (%edx,%edx,2), %eax
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_3:
define i64 @urem_i64_5(i64 %x) nounwind {
; X32-LABEL: urem_i64_5:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $5
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $2, %edx
+; X32-NEXT: leal (%edx,%edx,4), %eax
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_5:
define i64 @urem_i64_15(i64 %x) nounwind {
; X32-LABEL: urem_i64_15:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $15
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl $-2004318071, %edx # imm = 0x88888889
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $3, %edx
+; X32-NEXT: leal (%edx,%edx,4), %eax
+; X32-NEXT: leal (%eax,%eax,2), %eax
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_15:
define i64 @urem_i64_17(i64 %x) nounwind {
; X32-LABEL: urem_i64_17:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $17
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl $-252645135, %edx # imm = 0xF0F0F0F1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andl $-16, %eax
+; X32-NEXT: shrl $4, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: subl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_17:
define i64 @urem_i64_255(i64 %x) nounwind {
; X32-LABEL: urem_i64_255:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $255
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl $-2139062143, %edx # imm = 0x80808081
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $7, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: shll $8, %eax
+; X32-NEXT: subl %eax, %edx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_255:
define i64 @urem_i64_257(i64 %x) nounwind {
; X32-LABEL: urem_i64_257:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $257 # imm = 0x101
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl $-16711935, %edx # imm = 0xFF00FF01
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andl $-256, %eax
+; X32-NEXT: shrl $8, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: subl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_257:
define i64 @urem_i64_65535(i64 %x) nounwind {
; X32-LABEL: urem_i64_65535:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $65535 # imm = 0xFFFF
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl $-2147450879, %edx # imm = 0x80008001
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $15, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: shll $16, %eax
+; X32-NEXT: subl %eax, %edx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_65535:
define i64 @urem_i64_65537(i64 %x) nounwind {
; X32-LABEL: urem_i64_65537:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $65537 # imm = 0x10001
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __umoddi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl $-65535, %edx # imm = 0xFFFF0001
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: shrl $16, %eax
+; X32-NEXT: shldl $16, %edx, %eax
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_65537:
define i64 @udiv_i64_3(i64 %x) nounwind {
; X32-LABEL: udiv_i64_3:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $3
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl $-1431655765, %ebx # imm = 0xAAAAAAAB
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: shrl %edx
+; X32-NEXT: leal (%edx,%edx,2), %eax
+; X32-NEXT: subl %eax, %esi
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: sbbl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_3:
define i64 @udiv_i64_5(i64 %x) nounwind {
; X32-LABEL: udiv_i64_5:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $5
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl $-858993459, %ebx # imm = 0xCCCCCCCD
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: shrl $2, %edx
+; X32-NEXT: leal (%edx,%edx,4), %eax
+; X32-NEXT: subl %eax, %esi
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: sbbl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: imull $-858993460, %ecx, %ecx # imm = 0xCCCCCCCC
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-858993459, %edi, %ecx # imm = 0xCCCCCCCD
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_5:
define i64 @udiv_i64_15(i64 %x) nounwind {
; X32-LABEL: udiv_i64_15:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $15
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl $-2004318071, %edx # imm = 0x88888889
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $3, %edx
+; X32-NEXT: leal (%edx,%edx,4), %eax
+; X32-NEXT: leal (%eax,%eax,2), %eax
+; X32-NEXT: subl %eax, %esi
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: sbbl $0, %edi
+; X32-NEXT: movl $-286331153, %edx # imm = 0xEEEEEEEF
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: imull $-286331154, %ecx, %ecx # imm = 0xEEEEEEEE
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-286331153, %edi, %ecx # imm = 0xEEEEEEEF
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_15:
define i64 @udiv_i64_17(i64 %x) nounwind {
; X32-LABEL: udiv_i64_17:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $17
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl $-252645135, %ebx # imm = 0xF0F0F0F1
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andl $-16, %eax
+; X32-NEXT: shrl $4, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: subl %edx, %esi
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: sbbl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: imull $-252645136, %ecx, %ecx # imm = 0xF0F0F0F0
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-252645135, %edi, %ecx # imm = 0xF0F0F0F1
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_17:
define i64 @udiv_i64_255(i64 %x) nounwind {
; X32-LABEL: udiv_i64_255:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $255
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl $-2139062143, %edx # imm = 0x80808081
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $7, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: shll $8, %eax
+; X32-NEXT: subl %eax, %edx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: sbbl $0, %esi
+; X32-NEXT: movl $-16843009, %edx # imm = 0xFEFEFEFF
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: imull $-16843010, %ecx, %ecx # imm = 0xFEFEFEFE
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-16843009, %esi, %ecx # imm = 0xFEFEFEFF
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_255:
define i64 @udiv_i64_257(i64 %x) nounwind {
; X32-LABEL: udiv_i64_257:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $257 # imm = 0x101
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl $-16711935, %ebx # imm = 0xFF00FF01
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andl $-256, %eax
+; X32-NEXT: shrl $8, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: subl %edx, %esi
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: sbbl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: imull $-16711936, %ecx, %ecx # imm = 0xFF00FF00
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-16711935, %edi, %ecx # imm = 0xFF00FF01
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_257:
define i64 @udiv_i64_65535(i64 %x) nounwind {
; X32-LABEL: udiv_i64_65535:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $65535 # imm = 0xFFFF
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl $-2147450879, %edx # imm = 0x80008001
+; X32-NEXT: mull %edx
+; X32-NEXT: shrl $15, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: shll $16, %eax
+; X32-NEXT: subl %eax, %edx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: sbbl $0, %esi
+; X32-NEXT: movl $-65537, %edx # imm = 0xFFFEFFFF
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edx
+; X32-NEXT: imull $-65538, %ecx, %ecx # imm = 0xFFFEFFFE
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: shll $16, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: subl %ecx, %edx
+; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_65535:
define i64 @udiv_i64_65537(i64 %x) nounwind {
; X32-LABEL: udiv_i64_65537:
; X32: # %bb.0: # %entry
-; X32-NEXT: subl $12, %esp
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $65537 # imm = 0x10001
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll __udivdi3
-; X32-NEXT: addl $28, %esp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl $-65535, %ebx # imm = 0xFFFF0001
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: shrl $16, %eax
+; X32-NEXT: shldl $16, %edx, %eax
+; X32-NEXT: subl %eax, %esi
+; X32-NEXT: subl %esi, %ecx
+; X32-NEXT: sbbl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: shll $16, %ecx
+; X32-NEXT: subl %ecx, %edx
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: shll $16, %ecx
+; X32-NEXT: subl %ecx, %edi
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_65537:
ret i64 %rem
}
+; Make sure we don't inline expand for optsize.
define i64 @urem_i64_3_optsize(i64 %x) nounwind optsize {
; X32-LABEL: urem_i64_3_optsize:
; X32: # %bb.0: # %entry
define i64 @udiv128(i128 %x) nounwind {
; X86-64-LABEL: udiv128:
; X86-64: # %bb.0:
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $3, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rdi, %rsi
+; X86-64-NEXT: adcq $0, %rsi
+; X86-64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
+; X86-64-NEXT: movq %rsi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
+; X86-64-NEXT: subq %rsi, %rax
+; X86-64-NEXT: addq %rdi, %rax
+; X86-64-NEXT: imulq %rcx, %rax
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv128:
; WIN64: # %bb.0:
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $3, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: addq %rcx, %r8
+; WIN64-NEXT: adcq $0, %r8
+; WIN64-NEXT: movabsq $-6148914691236517205, %r9 # imm = 0xAAAAAAAAAAAAAAAB
+; WIN64-NEXT: movq %r8, %rax
+; WIN64-NEXT: mulq %r9
+; WIN64-NEXT: shrq %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
+; WIN64-NEXT: subq %r8, %rax
+; WIN64-NEXT: addq %rcx, %rax
+; WIN64-NEXT: imulq %r9, %rax
; WIN64-NEXT: retq
define i128 @urem_i128_3(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_3:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $3, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq $0, %rdi
+; X86-64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
+; X86-64-NEXT: subq %rax, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_3:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $3, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-6148914691236517205, %rdx # imm = 0xAAAAAAAAAAAAAAAB
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 3
define i128 @urem_i128_5(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_5:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $5, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq $0, %rdi
+; X86-64-NEXT: movabsq $-3689348814741910323, %rcx # imm = 0xCCCCCCCCCCCCCCCD
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq $2, %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax
+; X86-64-NEXT: subq %rax, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_5:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $5, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-3689348814741910323, %rdx # imm = 0xCCCCCCCCCCCCCCCD
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $2, %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 5
define i128 @urem_i128_15(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_15:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $15, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq $0, %rdi
+; X86-64-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq $3, %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax
+; X86-64-NEXT: leaq (%rax,%rax,2), %rax
+; X86-64-NEXT: subq %rax, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_15:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $15, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $3, %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
+; WIN64-NEXT: leaq (%rax,%rax,2), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 15
define i128 @urem_i128_17(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_17:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $17, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq $0, %rdi
+; X86-64-NEXT: movabsq $-1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F1
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: andq $-16, %rax
+; X86-64-NEXT: shrq $4, %rdx
+; X86-64-NEXT: addq %rax, %rdx
+; X86-64-NEXT: subq %rdx, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_17:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $17, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F1
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: andq $-16, %rax
+; WIN64-NEXT: shrq $4, %rdx
+; WIN64-NEXT: addq %rax, %rdx
+; WIN64-NEXT: subq %rdx, %rcx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 17
define i128 @urem_i128_255(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_255:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $255, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: addq %rsi, %rax
+; X86-64-NEXT: adcq $0, %rax
+; X86-64-NEXT: movabsq $-9187201950435737471, %rcx # imm = 0x8080808080808081
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq $7, %rdx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: shlq $8, %rax
+; X86-64-NEXT: subq %rax, %rdx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq %rdx, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_255:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $255, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: addq %rdx, %rax
+; WIN64-NEXT: adcq $0, %rax
+; WIN64-NEXT: movabsq $-9187201950435737471, %rdx # imm = 0x8080808080808081
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $7, %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: shlq $8, %rax
+; WIN64-NEXT: subq %rax, %rdx
+; WIN64-NEXT: addq %rcx, %r8
+; WIN64-NEXT: adcq %rdx, %r8
+; WIN64-NEXT: movq %r8, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 255
define i128 @urem_i128_257(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_257:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $257, %edx # imm = 0x101
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq $0, %rdi
+; X86-64-NEXT: movabsq $-71777214294589695, %rcx # imm = 0xFF00FF00FF00FF01
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: andq $-256, %rax
+; X86-64-NEXT: shrq $8, %rdx
+; X86-64-NEXT: addq %rax, %rdx
+; X86-64-NEXT: subq %rdx, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_257:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $257, {{[0-9]+}}(%rsp) # imm = 0x101
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-71777214294589695, %rdx # imm = 0xFF00FF00FF00FF01
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: andq $-256, %rax
+; WIN64-NEXT: shrq $8, %rdx
+; WIN64-NEXT: addq %rax, %rdx
+; WIN64-NEXT: subq %rdx, %rcx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 257
define i128 @urem_i128_65535(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_65535:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $65535, %edx # imm = 0xFFFF
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: addq %rsi, %rax
+; X86-64-NEXT: adcq $0, %rax
+; X86-64-NEXT: movabsq $-9223231297218904063, %rcx # imm = 0x8000800080008001
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq $15, %rdx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: shlq $16, %rax
+; X86-64-NEXT: subq %rax, %rdx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq %rdx, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_65535:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $65535, {{[0-9]+}}(%rsp) # imm = 0xFFFF
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: addq %rdx, %rax
+; WIN64-NEXT: adcq $0, %rax
+; WIN64-NEXT: movabsq $-9223231297218904063, %rdx # imm = 0x8000800080008001
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $15, %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: shlq $16, %rax
+; WIN64-NEXT: subq %rax, %rdx
+; WIN64-NEXT: addq %rcx, %r8
+; WIN64-NEXT: adcq %rdx, %r8
+; WIN64-NEXT: movq %r8, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 65535
define i128 @urem_i128_65537(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_65537:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $65537, %edx # imm = 0x10001
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __umodti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: addq %rsi, %rdi
+; X86-64-NEXT: adcq $0, %rdi
+; X86-64-NEXT: movabsq $-281470681808895, %rcx # imm = 0xFFFF0000FFFF0001
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: andq $-65536, %rax # imm = 0xFFFF0000
+; X86-64-NEXT: shrq $16, %rdx
+; X86-64-NEXT: addq %rax, %rdx
+; X86-64-NEXT: subq %rdx, %rdi
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: xorl %edx, %edx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_65537:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $65537, {{[0-9]+}}(%rsp) # imm = 0x10001
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __umodti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-281470681808895, %rdx # imm = 0xFFFF0000FFFF0001
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: andq $-65536, %rax # imm = 0xFFFF0000
+; WIN64-NEXT: shrq $16, %rdx
+; WIN64-NEXT: addq %rax, %rdx
+; WIN64-NEXT: subq %rdx, %rcx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: xorl %edx, %edx
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 65537
define i128 @udiv_i128_3(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_3:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $3, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: addq %rsi, %rcx
+; X86-64-NEXT: adcq $0, %rcx
+; X86-64-NEXT: movabsq $-6148914691236517205, %r8 # imm = 0xAAAAAAAAAAAAAAAB
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: shrq %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
+; X86-64-NEXT: subq %rax, %rcx
+; X86-64-NEXT: subq %rcx, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
+; X86-64-NEXT: imulq %rdi, %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: addq %rcx, %rdx
+; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_3:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $3, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %r9
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-6148914691236517205, %r10 # imm = 0xAAAAAAAAAAAAAAAB
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: shrq %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: subq %rcx, %r9
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
+; WIN64-NEXT: imulq %r9, %rcx
+; WIN64-NEXT: movq %r9, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %rcx, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 3
define i128 @udiv_i128_5(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_5:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $5, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: addq %rsi, %rcx
+; X86-64-NEXT: adcq $0, %rcx
+; X86-64-NEXT: movabsq $-3689348814741910323, %r8 # imm = 0xCCCCCCCCCCCCCCCD
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: shrq $2, %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax
+; X86-64-NEXT: subq %rax, %rcx
+; X86-64-NEXT: subq %rcx, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-3689348814741910324, %rcx # imm = 0xCCCCCCCCCCCCCCCC
+; X86-64-NEXT: imulq %rdi, %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: addq %rcx, %rdx
+; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_5:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $5, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %r9
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-3689348814741910323, %r10 # imm = 0xCCCCCCCCCCCCCCCD
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: shrq $2, %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: subq %rcx, %r9
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-3689348814741910324, %rcx # imm = 0xCCCCCCCCCCCCCCCC
+; WIN64-NEXT: imulq %r9, %rcx
+; WIN64-NEXT: movq %r9, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %rcx, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 5
define i128 @udiv_i128_15(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_15:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $15, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: addq %rsi, %rcx
+; X86-64-NEXT: adcq $0, %rcx
+; X86-64-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %rdx
+; X86-64-NEXT: shrq $3, %rdx
+; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax
+; X86-64-NEXT: leaq (%rax,%rax,2), %rax
+; X86-64-NEXT: subq %rax, %rcx
+; X86-64-NEXT: subq %rcx, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-1229782938247303442, %r8 # imm = 0xEEEEEEEEEEEEEEEE
+; X86-64-NEXT: imulq %rdi, %r8
+; X86-64-NEXT: movabsq $-1229782938247303441, %rcx # imm = 0xEEEEEEEEEEEEEEEF
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: addq %r8, %rdx
+; X86-64-NEXT: imulq %rsi, %rcx
+; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_15:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $15, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %r9
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $3, %rdx
+; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
+; WIN64-NEXT: leaq (%rax,%rax,2), %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: subq %rcx, %r9
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-1229782938247303442, %rcx # imm = 0xEEEEEEEEEEEEEEEE
+; WIN64-NEXT: imulq %r9, %rcx
+; WIN64-NEXT: movabsq $-1229782938247303441, %r10 # imm = 0xEEEEEEEEEEEEEEEF
+; WIN64-NEXT: movq %r9, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %rcx, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 15
define i128 @udiv_i128_17(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_17:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $17, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: addq %rsi, %rcx
+; X86-64-NEXT: adcq $0, %rcx
+; X86-64-NEXT: movabsq $-1085102592571150095, %r8 # imm = 0xF0F0F0F0F0F0F0F1
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: andq $-16, %rax
+; X86-64-NEXT: shrq $4, %rdx
+; X86-64-NEXT: addq %rax, %rdx
+; X86-64-NEXT: subq %rdx, %rcx
+; X86-64-NEXT: subq %rcx, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0
+; X86-64-NEXT: imulq %rdi, %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: addq %rcx, %rdx
+; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_17:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $17, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %r9
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-1085102592571150095, %r10 # imm = 0xF0F0F0F0F0F0F0F1
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: andq $-16, %rax
+; WIN64-NEXT: shrq $4, %rdx
+; WIN64-NEXT: addq %rax, %rdx
+; WIN64-NEXT: subq %rdx, %rcx
+; WIN64-NEXT: subq %rcx, %r9
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0
+; WIN64-NEXT: imulq %r9, %rcx
+; WIN64-NEXT: movq %r9, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %rcx, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 17
define i128 @udiv_i128_255(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_255:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $255, %edx
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: addq %rsi, %rax
+; X86-64-NEXT: adcq $0, %rax
+; X86-64-NEXT: movabsq $-9187201950435737471, %rcx # imm = 0x8080808080808081
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq $7, %rdx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: shlq $8, %rax
+; X86-64-NEXT: subq %rax, %rdx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: addq %rsi, %rax
+; X86-64-NEXT: adcq %rdx, %rax
+; X86-64-NEXT: subq %rax, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-72340172838076674, %r8 # imm = 0xFEFEFEFEFEFEFEFE
+; X86-64-NEXT: imulq %rdi, %r8
+; X86-64-NEXT: movabsq $-72340172838076673, %rcx # imm = 0xFEFEFEFEFEFEFEFF
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: addq %r8, %rdx
+; X86-64-NEXT: imulq %rsi, %rcx
+; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_255:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $255, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: addq %rdx, %rax
+; WIN64-NEXT: adcq $0, %rax
+; WIN64-NEXT: movabsq $-9187201950435737471, %rdx # imm = 0x8080808080808081
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $7, %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: shlq $8, %rax
+; WIN64-NEXT: subq %rax, %rdx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: addq %r8, %rax
+; WIN64-NEXT: adcq %rdx, %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-72340172838076674, %r9 # imm = 0xFEFEFEFEFEFEFEFE
+; WIN64-NEXT: imulq %rcx, %r9
+; WIN64-NEXT: movabsq $-72340172838076673, %r10 # imm = 0xFEFEFEFEFEFEFEFF
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %r9, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 255
define i128 @udiv_i128_257(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_257:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $257, %edx # imm = 0x101
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: addq %rsi, %rcx
+; X86-64-NEXT: adcq $0, %rcx
+; X86-64-NEXT: movabsq $-71777214294589695, %r8 # imm = 0xFF00FF00FF00FF01
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: andq $-256, %rax
+; X86-64-NEXT: shrq $8, %rdx
+; X86-64-NEXT: addq %rax, %rdx
+; X86-64-NEXT: subq %rdx, %rcx
+; X86-64-NEXT: subq %rcx, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-71777214294589696, %rcx # imm = 0xFF00FF00FF00FF00
+; X86-64-NEXT: imulq %rdi, %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: addq %rcx, %rdx
+; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_257:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $257, {{[0-9]+}}(%rsp) # imm = 0x101
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %r9
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-71777214294589695, %r10 # imm = 0xFF00FF00FF00FF01
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: andq $-256, %rax
+; WIN64-NEXT: shrq $8, %rdx
+; WIN64-NEXT: addq %rax, %rdx
+; WIN64-NEXT: subq %rdx, %rcx
+; WIN64-NEXT: subq %rcx, %r9
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-71777214294589696, %rcx # imm = 0xFF00FF00FF00FF00
+; WIN64-NEXT: imulq %r9, %rcx
+; WIN64-NEXT: movq %r9, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %rcx, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 257
define i128 @udiv_i128_65535(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_65535:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $65535, %edx # imm = 0xFFFF
-; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
-; X86-64-NEXT: popq %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: addq %rsi, %rax
+; X86-64-NEXT: adcq $0, %rax
+; X86-64-NEXT: movabsq $-9223231297218904063, %rcx # imm = 0x8000800080008001
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: shrq $15, %rdx
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: shlq $16, %rax
+; X86-64-NEXT: subq %rax, %rdx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: addq %rsi, %rax
+; X86-64-NEXT: adcq %rdx, %rax
+; X86-64-NEXT: subq %rax, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-281479271743490, %r8 # imm = 0xFFFEFFFEFFFEFFFE
+; X86-64-NEXT: imulq %rdi, %r8
+; X86-64-NEXT: movabsq $-281479271743489, %rcx # imm = 0xFFFEFFFEFFFEFFFF
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %rcx
+; X86-64-NEXT: addq %r8, %rdx
+; X86-64-NEXT: imulq %rsi, %rcx
+; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_65535:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $65535, {{[0-9]+}}(%rsp) # imm = 0xFFFF
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
-; WIN64-NEXT: movq %xmm0, %rax
-; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; WIN64-NEXT: movq %xmm0, %rdx
-; WIN64-NEXT: addq $72, %rsp
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: addq %rdx, %rax
+; WIN64-NEXT: adcq $0, %rax
+; WIN64-NEXT: movabsq $-9223231297218904063, %rdx # imm = 0x8000800080008001
+; WIN64-NEXT: mulq %rdx
+; WIN64-NEXT: shrq $15, %rdx
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: shlq $16, %rax
+; WIN64-NEXT: subq %rax, %rdx
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: addq %r8, %rax
+; WIN64-NEXT: adcq %rdx, %rax
+; WIN64-NEXT: subq %rax, %rcx
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-281479271743490, %r9 # imm = 0xFFFEFFFEFFFEFFFE
+; WIN64-NEXT: imulq %rcx, %r9
+; WIN64-NEXT: movabsq $-281479271743489, %r10 # imm = 0xFFFEFFFEFFFEFFFF
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %r9, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 65535
define i128 @udiv_i128_65537(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_65537:
; X86-64: # %bb.0: # %entry
+; X86-64-NEXT: movq %rdi, %rcx
+; X86-64-NEXT: addq %rsi, %rcx
+; X86-64-NEXT: adcq $0, %rcx
+; X86-64-NEXT: movabsq $-281470681808895, %r8 # imm = 0xFFFF0000FFFF0001
+; X86-64-NEXT: movq %rcx, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: movq %rdx, %rax
+; X86-64-NEXT: andq $-65536, %rax # imm = 0xFFFF0000
+; X86-64-NEXT: shrq $16, %rdx
+; X86-64-NEXT: addq %rax, %rdx
+; X86-64-NEXT: subq %rdx, %rcx
+; X86-64-NEXT: subq %rcx, %rdi
+; X86-64-NEXT: sbbq $0, %rsi
+; X86-64-NEXT: movabsq $-281470681808896, %rcx # imm = 0xFFFF0000FFFF0000
+; X86-64-NEXT: imulq %rdi, %rcx
+; X86-64-NEXT: movq %rdi, %rax
+; X86-64-NEXT: mulq %r8
+; X86-64-NEXT: addq %rcx, %rdx
+; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %r8, %rdx
+; X86-64-NEXT: retq
+;
+; WIN64-LABEL: udiv_i128_65537:
+; WIN64: # %bb.0: # %entry
+; WIN64-NEXT: movq %rdx, %r8
+; WIN64-NEXT: movq %rcx, %r9
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: adcq $0, %rcx
+; WIN64-NEXT: movabsq $-281470681808895, %r10 # imm = 0xFFFF0000FFFF0001
+; WIN64-NEXT: movq %rcx, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: movq %rdx, %rax
+; WIN64-NEXT: andq $-65536, %rax # imm = 0xFFFF0000
+; WIN64-NEXT: shrq $16, %rdx
+; WIN64-NEXT: addq %rax, %rdx
+; WIN64-NEXT: subq %rdx, %rcx
+; WIN64-NEXT: subq %rcx, %r9
+; WIN64-NEXT: sbbq $0, %r8
+; WIN64-NEXT: movabsq $-281470681808896, %rcx # imm = 0xFFFF0000FFFF0000
+; WIN64-NEXT: imulq %r9, %rcx
+; WIN64-NEXT: movq %r9, %rax
+; WIN64-NEXT: mulq %r10
+; WIN64-NEXT: addq %rcx, %rdx
+; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r8, %rdx
+; WIN64-NEXT: retq
+entry:
+ %rem = udiv i128 %x, 65537
+ ret i128 %rem
+}
+
+define i128 @udiv_i128_12(i128 %x) nounwind {
+; X86-64-LABEL: udiv_i128_12:
+; X86-64: # %bb.0: # %entry
; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $65537, %edx # imm = 0x10001
+; X86-64-NEXT: movl $12, %edx
; X86-64-NEXT: xorl %ecx, %ecx
; X86-64-NEXT: callq __udivti3@PLT
; X86-64-NEXT: popq %rcx
; X86-64-NEXT: retq
;
-; WIN64-LABEL: udiv_i128_65537:
+; WIN64-LABEL: udiv_i128_12:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: subq $72, %rsp
; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $65537, {{[0-9]+}}(%rsp) # imm = 0x10001
+; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp)
; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN64-NEXT: addq $72, %rsp
; WIN64-NEXT: retq
entry:
- %rem = udiv i128 %x, 65537
+ %rem = udiv i128 %x, 12
ret i128 %rem
}
-define i128 @udiv_i128_12(i128 %x) nounwind {
-; X86-64-LABEL: udiv_i128_12:
+; Make sure we don't inline expand for minsize.
+define i128 @urem_i128_3_minsize(i128 %x) nounwind minsize {
+; X86-64-LABEL: urem_i128_3_minsize:
; X86-64: # %bb.0: # %entry
; X86-64-NEXT: pushq %rax
-; X86-64-NEXT: movl $12, %edx
+; X86-64-NEXT: pushq $3
+; X86-64-NEXT: popq %rdx
; X86-64-NEXT: xorl %ecx, %ecx
-; X86-64-NEXT: callq __udivti3@PLT
+; X86-64-NEXT: callq __umodti3@PLT
; X86-64-NEXT: popq %rcx
; X86-64-NEXT: retq
;
-; WIN64-LABEL: udiv_i128_12:
+; WIN64-LABEL: urem_i128_3_minsize:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: subq $72, %rsp
-; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rax
+; WIN64-NEXT: movq %rdx, 8(%rax)
+; WIN64-NEXT: movq %rcx, (%rax)
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; WIN64-NEXT: callq __udivti3
+; WIN64-NEXT: movq $3, (%rdx)
+; WIN64-NEXT: andq $0, 8(%rdx)
+; WIN64-NEXT: movq %rax, %rcx
+; WIN64-NEXT: callq __umodti3
; WIN64-NEXT: movq %xmm0, %rax
; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; WIN64-NEXT: movq %xmm0, %rdx
; WIN64-NEXT: addq $72, %rsp
; WIN64-NEXT: retq
entry:
- %rem = udiv i128 %x, 12
+ %rem = urem i128 %x, 3
ret i128 %rem
}
+; Make sure we don't inline expand for optsize.
define i128 @urem_i128_3_optsize(i128 %x) nounwind optsize {
; X86-64-LABEL: urem_i128_3_optsize:
; X86-64: # %bb.0: # %entry