From bc7f11ccb01cbc2ae6c1631535ea5c181f70cb1e Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 24 May 2023 10:39:43 -0700 Subject: [PATCH] [SelectionDAG] Improve expansion of wide min/max The current implementation tries to handle the high and low halves separately, but that's less efficient in most cases; use a wide SETCC instead. Differential Revision: https://reviews.llvm.org/D151358 --- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 122 +- llvm/test/CodeGen/AArch64/dag-combine-setcc.ll | 4 +- llvm/test/CodeGen/AArch64/fpclamptosat.ll | 39 +- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 42 +- .../CodeGen/AArch64/vecreduce-umax-legalization.ll | 10 +- llvm/test/CodeGen/AMDGPU/max.ll | 32 +- llvm/test/CodeGen/AMDGPU/min.ll | 32 +- llvm/test/CodeGen/ARM/fpclamptosat.ll | 1968 +++++++------------- llvm/test/CodeGen/ARM/fpclamptosat_vec.ll | 1812 ++++++++---------- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 858 ++++----- llvm/test/CodeGen/RISCV/min-max.ll | 152 +- llvm/test/CodeGen/RISCV/rv32zbb.ll | 188 +- llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 788 +++----- llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll | 618 ++---- llvm/test/CodeGen/Thumb2/mve-minmaxi.ll | 104 +- llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll | 64 +- llvm/test/CodeGen/VE/Scalar/smax.ll | 46 +- llvm/test/CodeGen/VE/Scalar/smin.ll | 52 +- llvm/test/CodeGen/VE/Scalar/umax.ll | 19 +- llvm/test/CodeGen/VE/Scalar/umin.ll | 19 +- llvm/test/CodeGen/WebAssembly/fpclamptosat.ll | 45 - llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll | 168 +- llvm/test/CodeGen/X86/abds.ll | 28 +- llvm/test/CodeGen/X86/abdu.ll | 26 +- llvm/test/CodeGen/X86/fpclamptosat.ll | 18 +- llvm/test/CodeGen/X86/fpclamptosat_vec.ll | 207 +- llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 917 ++++----- llvm/test/CodeGen/X86/smax.ll | 103 +- llvm/test/CodeGen/X86/smin.ll | 77 +- llvm/test/CodeGen/X86/udiv_fix_sat.ll | 5 - llvm/test/CodeGen/X86/umax.ll | 141 +- llvm/test/CodeGen/X86/umin.ll | 75 +- 32 files changed, 3368 insertions(+), 5411 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0c4c5fe..7c99a9f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2943,64 +2943,118 @@ static std::pair getExpandedMinMaxOps(int Op) { void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); - ISD::NodeType LoOpc; - ISD::CondCode CondC; - std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - // Expand the subcomponents. - SDValue LHSL, LHSH, RHSL, RHSH; - GetExpandedInteger(LHS, LHSL, LHSH); - GetExpandedInteger(RHS, RHSL, RHSH); - - // Value types - EVT NVT = LHSL.getValueType(); - EVT CCT = getSetCCResultType(NVT); - // If the upper halves are all sign bits, then we can perform the MINMAX on // the lower half and sign-extend the result to the upper half. - unsigned NumHalfBits = NVT.getScalarSizeInBits(); + unsigned NumBits = N->getValueType(0).getScalarSizeInBits(); + unsigned NumHalfBits = NumBits / 2; if (DAG.ComputeNumSignBits(LHS) > NumHalfBits && DAG.ComputeNumSignBits(RHS) > NumHalfBits) { + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + EVT NVT = LHSL.getValueType(); + Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL); Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo, DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL)); return; } - // Hi part is always the same op - Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); - // The Lo of smin(X, -1) is LHSL if X is negative. Otherwise it's -1. - if (N->getOpcode() == ISD::SMIN && isAllOnesConstant(RHS)) { - SDValue HiNeg = - DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT); - Lo = DAG.getSelect(DL, NVT, HiNeg, LHSL, DAG.getConstant(-1, DL, NVT)); - return; - } - // The Lo of smax(X, 0) is 0 if X is negative. Otherwise it's LHSL. - if (N->getOpcode() == ISD::SMAX && isNullConstant(RHS)) { + if ((N->getOpcode() == ISD::SMAX && isNullConstant(RHS)) || + (N->getOpcode() == ISD::SMIN && isAllOnesConstant(RHS))) { + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + EVT NVT = LHSL.getValueType(); + EVT CCT = getSetCCResultType(NVT); + SDValue HiNeg = DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT); - Lo = DAG.getSelect(DL, NVT, HiNeg, DAG.getConstant(0, DL, NVT), LHSL); + if (N->getOpcode() == ISD::SMIN) { + Lo = DAG.getSelect(DL, NVT, HiNeg, LHSL, DAG.getConstant(-1, DL, NVT)); + } else { + Lo = DAG.getSelect(DL, NVT, HiNeg, DAG.getConstant(0, DL, NVT), LHSL); + } + Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); return; } - // We need to know whether to select Lo part that corresponds to 'winning' - // Hi part or if Hi parts are equal. - SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC); - SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ); + const APInt *RHSVal = nullptr; + if (auto *RHSConst = dyn_cast(RHS)) + RHSVal = &RHSConst->getAPIntValue(); + + // The high half of MIN/MAX is always just the the MIN/MAX of the + // high halves of the operands. Expand this way if it appears profitable. + if (RHSVal && (N->getOpcode() == ISD::UMIN || N->getOpcode() == ISD::UMAX) && + (RHSVal->countLeadingOnes() >= NumHalfBits || + RHSVal->countLeadingZeros() >= NumHalfBits)) { + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + EVT NVT = LHSL.getValueType(); + EVT CCT = getSetCCResultType(NVT); + + ISD::NodeType LoOpc; + ISD::CondCode CondC; + std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); - // Lo part corresponding to the 'winning' Hi part - SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); + Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); + // We need to know whether to select Lo part that corresponds to 'winning' + // Hi part or if Hi parts are equal. + SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC); + SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ); - // Recursed Lo part if Hi parts are equal, this uses unsigned version - SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL}); + // Lo part corresponding to the 'winning' Hi part + SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); + + // Recursed Lo part if Hi parts are equal, this uses unsigned version + SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL}); + + Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); + return; + } - Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); + // Expand to "a < b ? a : b" etc. Prefer ge/le if that simplifies + // the compare. + ISD::CondCode Pred; + switch (N->getOpcode()) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: + if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits) + Pred = ISD::SETGE; + else + Pred = ISD::SETGT; + break; + case ISD::SMIN: + if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits) + Pred = ISD::SETLE; + else + Pred = ISD::SETLT; + break; + case ISD::UMAX: + if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits) + Pred = ISD::SETUGE; + else + Pred = ISD::SETUGT; + break; + case ISD::UMIN: + if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits) + Pred = ISD::SETULE; + else + Pred = ISD::SETULT; + break; + } + EVT VT = N->getValueType(0); + EVT CCT = getSetCCResultType(VT); + SDValue Cond = DAG.getSetCC(DL, CCT, LHS, RHS, Pred); + SDValue Result = DAG.getSelect(DL, VT, Cond, LHS, RHS); + SplitInteger(Result, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll index f6c4c84..cba2e7a 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -340,11 +340,9 @@ define [2 x i64] @PR58675(i128 %a.addr, i128 %b.addr) { ; CHECK-NEXT: .LBB20_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp x0, x8 +; CHECK-NEXT: sbcs xzr, x1, x9 ; CHECK-NEXT: csel x10, x0, x8, lo -; CHECK-NEXT: cmp x1, x9 -; CHECK-NEXT: csel x8, x0, x8, lo ; CHECK-NEXT: csel x11, x1, x9, lo -; CHECK-NEXT: csel x10, x10, x8, eq ; CHECK-NEXT: subs x8, x2, x10 ; CHECK-NEXT: sbc x9, x3, x11 ; CHECK-NEXT: cmp x3, x11 diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll index 9f10bce..06dc11d 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -882,9 +882,7 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x8, x0, xzr, eq -; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x0, xzr, x8, eq +; CHECK-NEXT: csel x0, x0, xzr, eq ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -902,11 +900,10 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt -; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: csel x8, xzr, x8, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x0, xzr, x8, lt +; CHECK-NEXT: csinc x8, x1, xzr, lt +; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: csel x0, xzr, x9, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -938,9 +935,7 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x8, x0, xzr, eq -; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x0, xzr, x8, eq +; CHECK-NEXT: csel x0, x0, xzr, eq ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -958,11 +953,10 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt -; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: csel x8, xzr, x8, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x0, xzr, x8, lt +; CHECK-NEXT: csinc x8, x1, xzr, lt +; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: csel x0, xzr, x9, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -1000,9 +994,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixunshfti ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x8, x0, xzr, eq -; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x0, xzr, x8, eq +; CHECK-NEXT: csel x0, x0, xzr, eq ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -1020,11 +1012,10 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt -; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: csel x8, xzr, x8, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x0, xzr, x8, lt +; CHECK-NEXT: csinc x8, x1, xzr, lt +; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: csel x0, xzr, x9, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 38ec6a0..99fcf5e 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -995,12 +995,8 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, x0, xzr, eq -; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x9, xzr, x9, eq ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 @@ -1036,13 +1032,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt -; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: csel x10, xzr, x10, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x10, xzr, x10, lt +; CHECK-NEXT: csinc x10, x20, xzr, lt +; CHECK-NEXT: csel x11, x19, xzr, lt +; CHECK-NEXT: cmp x10, #0 +; CHECK-NEXT: csel x10, xzr, x11, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x8, xzr, x8, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload @@ -1099,12 +1093,8 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, x0, xzr, eq -; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x9, xzr, x9, eq ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 @@ -1141,13 +1131,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt -; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: csel x10, xzr, x10, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x10, xzr, x10, lt +; CHECK-NEXT: csinc x10, x20, xzr, lt +; CHECK-NEXT: csel x11, x19, xzr, lt +; CHECK-NEXT: cmp x10, #0 +; CHECK-NEXT: csel x10, xzr, x11, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x8, xzr, x8, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload @@ -1216,12 +1204,8 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, x0, xzr, eq -; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x9, xzr, x9, eq ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 @@ -1258,13 +1242,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt -; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: csel x10, xzr, x10, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x10, xzr, x10, lt +; CHECK-NEXT: csinc x10, x20, xzr, lt +; CHECK-NEXT: csel x11, x19, xzr, lt +; CHECK-NEXT: cmp x10, #0 +; CHECK-NEXT: csel x10, xzr, x11, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x8, xzr, x8, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index 555ebe6..a688d91 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -162,12 +162,10 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind { define i128 @test_v2i128(<2 x i128> %a) nounwind { ; CHECK-LABEL: test_v2i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: csel x8, x0, x2, hi -; CHECK-NEXT: cmp x1, x3 -; CHECK-NEXT: csel x9, x0, x2, hi -; CHECK-NEXT: csel x1, x1, x3, hi -; CHECK-NEXT: csel x0, x8, x9, eq +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: csel x0, x0, x2, lo +; CHECK-NEXT: csel x1, x1, x3, lo ; CHECK-NEXT: ret %b = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a) ret i128 %b diff --git a/llvm/test/CodeGen/AMDGPU/max.ll b/llvm/test/CodeGen/AMDGPU/max.ll index ce1b5bb..52da8fe 100644 --- a/llvm/test/CodeGen/AMDGPU/max.ll +++ b/llvm/test/CodeGen/AMDGPU/max.ll @@ -288,8 +288,12 @@ define amdgpu_kernel void @s_test_imax_sge_i16(ptr addrspace(1) %out, [8 x i32], ; FUNC-LABEL: {{^}}test_umax_ugt_i64 ; SI: s_endpgm -; EG: MAX_UINT -; EG: MAX_UINT +; EG: SETE_INT +; EG: SETGT_UINT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_umax_ugt_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind { %tmp = icmp ugt i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b @@ -300,8 +304,12 @@ define amdgpu_kernel void @test_umax_ugt_i64(ptr addrspace(1) %out, i64 %a, i64 ; FUNC-LABEL: {{^}}test_umax_uge_i64 ; SI: s_endpgm -; EG: MAX_UINT -; EG: MAX_UINT +; EG: SETE_INT +; EG: SETGT_UINT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_umax_uge_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind { %tmp = icmp uge i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b @@ -312,8 +320,12 @@ define amdgpu_kernel void @test_umax_uge_i64(ptr addrspace(1) %out, i64 %a, i64 ; FUNC-LABEL: {{^}}test_imax_sgt_i64 ; SI: s_endpgm -; EG-DAG: MAX_UINT -; EG-DAG: MAX_INT +; EG: SETE_INT +; EG: SETGT_INT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_imax_sgt_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind { %tmp = icmp sgt i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b @@ -324,8 +336,12 @@ define amdgpu_kernel void @test_imax_sgt_i64(ptr addrspace(1) %out, i64 %a, i64 ; FUNC-LABEL: {{^}}test_imax_sge_i64 ; SI: s_endpgm -; EG-DAG: MAX_UINT -; EG-DAG: MAX_INT +; EG: SETE_INT +; EG: SETGT_INT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_imax_sge_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind { %tmp = icmp sge i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll index c65db9e..efe8e41 100644 --- a/llvm/test/CodeGen/AMDGPU/min.ll +++ b/llvm/test/CodeGen/AMDGPU/min.ll @@ -549,8 +549,12 @@ define amdgpu_kernel void @s_test_imin_sle_i16(ptr addrspace(1) %out, i16 %a, i1 ; FUNC-LABEL: {{^}}test_umin_ult_i64 ; GCN: s_endpgm -; EG: MIN_UINT -; EG: MIN_UINT +; EG: SETE_INT +; EG: SETGT_UINT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_umin_ult_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %tmp = icmp ult i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b @@ -561,8 +565,12 @@ define amdgpu_kernel void @test_umin_ult_i64(ptr addrspace(1) %out, i64 %a, i64 ; FUNC-LABEL: {{^}}test_umin_ule_i64 ; GCN: s_endpgm -; EG: MIN_UINT -; EG: MIN_UINT +; EG: SETE_INT +; EG: SETGT_UINT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_umin_ule_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %tmp = icmp ule i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b @@ -573,8 +581,12 @@ define amdgpu_kernel void @test_umin_ule_i64(ptr addrspace(1) %out, i64 %a, i64 ; FUNC-LABEL: {{^}}test_imin_slt_i64 ; GCN: s_endpgm -; EG-DAG: MIN_UINT -; EG-DAG: MIN_INT +; EG: SETE_INT +; EG: SETGT_INT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_imin_slt_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %tmp = icmp slt i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b @@ -585,8 +597,12 @@ define amdgpu_kernel void @test_imin_slt_i64(ptr addrspace(1) %out, i64 %a, i64 ; FUNC-LABEL: {{^}}test_imin_sle_i64 ; GCN: s_endpgm -; EG-DAG: MIN_UINT -; EG-DAG: MIN_INT +; EG: SETE_INT +; EG: SETGT_INT +; EG: SETGT_UINT +; EG: CNDE_INT +; EG: CNDE_INT +; EG: CNDE_INT define amdgpu_kernel void @test_imin_sle_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %tmp = icmp sle i64 %a, %b %val = select i1 %tmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 66fe5d9..6c3c74a 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -2112,64 +2112,46 @@ entry: define i32 @stest_f64i32_mm(double %x) { ; SOFT-LABEL: stest_f64i32_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r7, lr} -; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: ldr r3, .LCPI27_0 -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bhs .LBB27_9 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: ldr r4, .LCPI27_0 +; SOFT-NEXT: subs r5, r0, r4 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: mov r5, r2 +; SOFT-NEXT: bge .LBB27_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB27_10 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB27_8 ; SOFT-NEXT: .LBB27_2: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB27_11 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB27_4 ; SOFT-NEXT: .LBB27_3: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB27_5 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB27_4: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: .LBB27_5: @ %entry -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: lsls r3, r2, #31 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: blt .LBB27_12 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bls .LBB27_13 +; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: lsls r2, r2, #31 +; SOFT-NEXT: subs r4, r2, r0 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB27_6 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB27_6: @ %entry +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB27_7: @ %entry -; SOFT-NEXT: adds r1, r1, #1 -; SOFT-NEXT: bne .LBB27_14 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB27_2 ; SOFT-NEXT: .LBB27_8: @ %entry -; SOFT-NEXT: pop {r7, pc} -; SOFT-NEXT: .LBB27_9: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB27_2 -; SOFT-NEXT: .LBB27_10: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: beq .LBB27_3 -; SOFT-NEXT: .LBB27_11: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB27_4 -; SOFT-NEXT: b .LBB27_5 -; SOFT-NEXT: .LBB27_12: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bhi .LBB27_7 -; SOFT-NEXT: .LBB27_13: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: adds r1, r1, #1 -; SOFT-NEXT: beq .LBB27_8 -; SOFT-NEXT: .LBB27_14: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r7, pc} +; SOFT-NEXT: b .LBB27_4 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.15: +; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI27_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -2179,29 +2161,21 @@ define i32 @stest_f64i32_mm(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz -; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: cmp r0, r2 -; VFP2-NEXT: mvn r3, #-2147483648 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r3, r0 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r2 -; VFP2-NEXT: mov.w r2, #-2147483648 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r3 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, #0 -; VFP2-NEXT: cmp.w r1, #-1 -; VFP2-NEXT: mov.w r3, #-2147483648 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r3, r0 -; VFP2-NEXT: cmp.w r0, #-2147483648 -; VFP2-NEXT: it ls -; VFP2-NEXT: movls r0, r2 -; VFP2-NEXT: adds r1, #1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r3 +; VFP2-NEXT: mvn r12, #-2147483648 +; VFP2-NEXT: subs.w r3, r0, r12 +; VFP2-NEXT: mov.w r2, #0 +; VFP2-NEXT: sbcs r3, r1, #0 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: cmp r2, #0 +; VFP2-NEXT: ite ne +; VFP2-NEXT: movne r2, r1 +; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: mov.w r1, #-1 +; VFP2-NEXT: rsbs.w r3, r0, #-2147483648 +; VFP2-NEXT: sbcs r1, r2 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge.w r0, #-2147483648 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: stest_f64i32_mm: @@ -2260,35 +2234,23 @@ define i32 @ustest_f64i32_mm(double %x) { ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: bpl .LBB29_5 +; SOFT-NEXT: asrs r3, r1, #31 +; SOFT-NEXT: ands r3, r1 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: cmp r1, #1 +; SOFT-NEXT: bge .LBB29_3 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB29_6 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bpl .LBB29_4 ; SOFT-NEXT: .LBB29_2: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB29_7 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r7, pc} ; SOFT-NEXT: .LBB29_3: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB29_8 +; SOFT-NEXT: mvns r0, r2 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB29_2 ; SOFT-NEXT: .LBB29_4: @ %entry -; SOFT-NEXT: pop {r7, pc} -; SOFT-NEXT: .LBB29_5: @ %entry -; SOFT-NEXT: mvns r3, r0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB29_2 -; SOFT-NEXT: .LBB29_6: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB29_3 -; SOFT-NEXT: .LBB29_7: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB29_4 -; SOFT-NEXT: .LBB29_8: @ %entry +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r7, pc} ; @@ -2298,15 +2260,10 @@ define i32 @ustest_f64i32_mm(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz -; VFP2-NEXT: mov r2, r0 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl.w r2, #-1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r2 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, #0 -; VFP2-NEXT: cmp r1, #0 +; VFP2-NEXT: cmp r1, #1 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge.w r0, #-1 +; VFP2-NEXT: ands.w r1, r1, r1, asr #31 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: pop {r7, pc} @@ -2327,64 +2284,46 @@ entry: define i32 @stest_f32i32_mm(float %x) { ; SOFT-LABEL: stest_f32i32_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r7, lr} -; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: ldr r3, .LCPI30_0 -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bhs .LBB30_9 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: ldr r4, .LCPI30_0 +; SOFT-NEXT: subs r5, r0, r4 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: mov r5, r2 +; SOFT-NEXT: bge .LBB30_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB30_10 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB30_8 ; SOFT-NEXT: .LBB30_2: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB30_11 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB30_4 ; SOFT-NEXT: .LBB30_3: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB30_5 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB30_4: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: .LBB30_5: @ %entry -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: lsls r3, r2, #31 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: blt .LBB30_12 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bls .LBB30_13 +; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: lsls r2, r2, #31 +; SOFT-NEXT: subs r4, r2, r0 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB30_6 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB30_6: @ %entry +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB30_7: @ %entry -; SOFT-NEXT: adds r1, r1, #1 -; SOFT-NEXT: bne .LBB30_14 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB30_2 ; SOFT-NEXT: .LBB30_8: @ %entry -; SOFT-NEXT: pop {r7, pc} -; SOFT-NEXT: .LBB30_9: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB30_2 -; SOFT-NEXT: .LBB30_10: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: beq .LBB30_3 -; SOFT-NEXT: .LBB30_11: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB30_4 -; SOFT-NEXT: b .LBB30_5 -; SOFT-NEXT: .LBB30_12: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bhi .LBB30_7 -; SOFT-NEXT: .LBB30_13: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: adds r1, r1, #1 -; SOFT-NEXT: beq .LBB30_8 -; SOFT-NEXT: .LBB30_14: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r7, pc} +; SOFT-NEXT: b .LBB30_4 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.15: +; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI30_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -2430,39 +2369,23 @@ entry: define i32 @ustest_f32i32_mm(float %x) { ; SOFT-LABEL: ustest_f32i32_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: bl __aeabi_f2lz ; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r3, r1 -; SOFT-NEXT: bmi .LBB32_2 +; SOFT-NEXT: cmp r1, #1 +; SOFT-NEXT: blt .LBB32_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: mvns r2, r0 ; SOFT-NEXT: .LBB32_2: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bpl .LBB32_6 +; SOFT-NEXT: asrs r3, r1, #31 +; SOFT-NEXT: ands r3, r1 +; SOFT-NEXT: bmi .LBB32_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB32_7 -; SOFT-NEXT: .LBB32_4: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB32_8 -; SOFT-NEXT: .LBB32_5: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB32_6: @ %entry -; SOFT-NEXT: mvns r4, r0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB32_4 -; SOFT-NEXT: .LBB32_7: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB32_5 -; SOFT-NEXT: .LBB32_8: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: .LBB32_4: @ %entry +; SOFT-NEXT: pop {r7, pc} ; ; VFP-LABEL: ustest_f32i32_mm: ; VFP: @ %bb.0: @ %entry @@ -2480,66 +2403,48 @@ entry: define i32 @stest_f16i32_mm(half %x) { ; SOFT-LABEL: stest_f16i32_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r7, lr} -; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: ldr r3, .LCPI33_0 -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bhs .LBB33_9 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: ldr r4, .LCPI33_0 +; SOFT-NEXT: subs r5, r0, r4 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: mov r5, r2 +; SOFT-NEXT: bge .LBB33_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB33_10 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB33_8 ; SOFT-NEXT: .LBB33_2: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB33_11 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB33_4 ; SOFT-NEXT: .LBB33_3: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB33_5 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB33_4: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: .LBB33_5: @ %entry -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: lsls r3, r2, #31 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: blt .LBB33_12 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bls .LBB33_13 +; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: lsls r2, r2, #31 +; SOFT-NEXT: subs r4, r2, r0 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB33_6 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB33_6: @ %entry +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB33_7: @ %entry -; SOFT-NEXT: adds r1, r1, #1 -; SOFT-NEXT: bne .LBB33_14 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB33_2 ; SOFT-NEXT: .LBB33_8: @ %entry -; SOFT-NEXT: pop {r7, pc} -; SOFT-NEXT: .LBB33_9: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB33_2 -; SOFT-NEXT: .LBB33_10: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: beq .LBB33_3 -; SOFT-NEXT: .LBB33_11: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB33_4 -; SOFT-NEXT: b .LBB33_5 -; SOFT-NEXT: .LBB33_12: @ %entry -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: cmp r0, r3 -; SOFT-NEXT: bhi .LBB33_7 -; SOFT-NEXT: .LBB33_13: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: adds r1, r1, #1 -; SOFT-NEXT: beq .LBB33_8 -; SOFT-NEXT: .LBB33_14: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r7, pc} +; SOFT-NEXT: b .LBB33_4 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.15: +; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI33_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -2609,41 +2514,25 @@ entry: define i32 @ustest_f16i32_mm(half %x) { ; SOFT-LABEL: ustest_f16i32_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __aeabi_f2lz ; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r3, r1 -; SOFT-NEXT: bmi .LBB35_2 +; SOFT-NEXT: cmp r1, #1 +; SOFT-NEXT: blt .LBB35_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: mvns r2, r0 ; SOFT-NEXT: .LBB35_2: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bpl .LBB35_6 +; SOFT-NEXT: asrs r3, r1, #31 +; SOFT-NEXT: ands r3, r1 +; SOFT-NEXT: bmi .LBB35_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB35_7 -; SOFT-NEXT: .LBB35_4: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB35_8 -; SOFT-NEXT: .LBB35_5: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB35_6: @ %entry -; SOFT-NEXT: mvns r4, r0 -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB35_4 -; SOFT-NEXT: .LBB35_7: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB35_5 -; SOFT-NEXT: .LBB35_8: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: .LBB35_4: @ %entry +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: ustest_f16i32_mm: ; VFP2: @ %bb.0: @ %entry @@ -3084,249 +2973,150 @@ define i64 @stest_f64i64_mm(double %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: ldr r5, .LCPI45_0 -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: blo .LBB45_2 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: ldr r6, .LCPI45_0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r6 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: bge .LBB45_12 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB45_13 ; SOFT-NEXT: .LBB45_2: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bmi .LBB45_4 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB45_14 +; SOFT-NEXT: .LBB45_3: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB45_5 ; SOFT-NEXT: .LBB45_4: @ %entry -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: beq .LBB45_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB45_6: @ %entry -; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: mvns r2, r0 -; SOFT-NEXT: cmp r4, r5 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: blo .LBB45_8 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: .LBB45_8: @ %entry -; SOFT-NEXT: cmp r4, r5 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: bne .LBB45_26 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB45_27 -; SOFT-NEXT: .LBB45_10: @ %entry +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: .LBB45_5: @ %entry +; SOFT-NEXT: mvns r0, r5 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB45_12 -; SOFT-NEXT: .LBB45_11: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: .LBB45_12: @ %entry -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: lsls r5, r0, #31 -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: bhi .LBB45_14 -; SOFT-NEXT: @ %bb.13: @ %entry -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: .LBB45_14: @ %entry -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bne .LBB45_7 +; SOFT-NEXT: @ %bb.6: @ %entry +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB45_7: @ %entry +; SOFT-NEXT: lsls r6, r4, #31 +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload +; SOFT-NEXT: rsbs r7, r7, #0 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: sbcs r7, r2 +; SOFT-NEXT: sbcs r0, r3 +; SOFT-NEXT: bge .LBB45_15 +; SOFT-NEXT: @ %bb.8: @ %entry +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB45_16 -; SOFT-NEXT: @ %bb.15: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: .LBB45_16: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: bpl .LBB45_28 -; SOFT-NEXT: @ %bb.17: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: blt .LBB45_29 -; SOFT-NEXT: .LBB45_18: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB45_20 -; SOFT-NEXT: .LBB45_19: -; SOFT-NEXT: asrs r3, r3, #31 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: ands r3, r2 -; SOFT-NEXT: .LBB45_20: @ %entry -; SOFT-NEXT: ands r3, r7 -; SOFT-NEXT: adds r2, r3, #1 -; SOFT-NEXT: beq .LBB45_22 -; SOFT-NEXT: @ %bb.21: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB45_22: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r3, r1 -; SOFT-NEXT: blt .LBB45_30 -; SOFT-NEXT: @ %bb.23: @ %entry -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: bls .LBB45_31 -; SOFT-NEXT: .LBB45_24: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB45_32 -; SOFT-NEXT: .LBB45_25: @ %entry -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: .LBB45_9: @ %entry +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB45_11 +; SOFT-NEXT: .LBB45_10: @ %entry +; SOFT-NEXT: str r4, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB45_11: @ %entry +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB45_26: @ %entry -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB45_10 -; SOFT-NEXT: .LBB45_27: @ %entry -; SOFT-NEXT: mov r6, r2 +; SOFT-NEXT: .LBB45_12: @ %entry +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB45_11 -; SOFT-NEXT: b .LBB45_12 -; SOFT-NEXT: .LBB45_28: @ %entry -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: bne .LBB45_2 +; SOFT-NEXT: .LBB45_13: @ %entry +; SOFT-NEXT: mov r3, r7 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bge .LBB45_18 -; SOFT-NEXT: .LBB45_29: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB45_19 -; SOFT-NEXT: b .LBB45_20 -; SOFT-NEXT: .LBB45_30: @ %entry -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: bhi .LBB45_24 -; SOFT-NEXT: .LBB45_31: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB45_25 -; SOFT-NEXT: .LBB45_32: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: bne .LBB45_3 +; SOFT-NEXT: .LBB45_14: @ %entry +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB45_4 +; SOFT-NEXT: b .LBB45_5 +; SOFT-NEXT: .LBB45_15: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB45_9 +; SOFT-NEXT: .LBB45_16: @ %entry +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB45_10 +; SOFT-NEXT: b .LBB45_11 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.33: +; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI45_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; VFP2-NEXT: .pad #4 -; VFP2-NEXT: sub sp, #4 +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: mvn r8, #-2147483648 -; VFP2-NEXT: mov r12, r1 -; VFP2-NEXT: cmp r1, r8 -; VFP2-NEXT: mvn r1, #-2147483648 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r1, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: mvn r4, #-2147483648 -; VFP2-NEXT: mov.w r5, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r9, r2, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r4, r1 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r7, #-2147483648 -; VFP2-NEXT: mov.w r1, #-2147483648 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r7, r4 -; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: mov r6, r3 -; VFP2-NEXT: it hi -; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: andne.w r6, r2, r6, asr #31 -; VFP2-NEXT: and.w r2, r6, r5 -; VFP2-NEXT: mov.w r6, #-1 -; VFP2-NEXT: adds r2, #1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r7 -; VFP2-NEXT: mov.w r7, #-1 -; VFP2-NEXT: cmp r12, r8 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r7, r0 -; VFP2-NEXT: mov.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r7, r0 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r6 -; VFP2-NEXT: cmp.w r9, #0 -; VFP2-NEXT: mov.w r3, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r7 -; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: it hi -; VFP2-NEXT: movhi r3, r0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r3, r0 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: it le -; VFP2-NEXT: movle r0, lr -; VFP2-NEXT: cmp r2, #0 +; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: mvn lr, #-2147483648 +; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: sbcs r4, r2, #0 +; VFP2-NEXT: sbcs r4, r3, #0 +; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: cmp r4, #0 +; VFP2-NEXT: itet eq +; VFP2-NEXT: moveq r3, r4 +; VFP2-NEXT: movne r4, r2 +; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r3 -; VFP2-NEXT: add sp, #4 -; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: rsbs r5, r0, #0 +; VFP2-NEXT: mov.w lr, #-2147483648 +; VFP2-NEXT: sbcs.w r5, lr, r1 +; VFP2-NEXT: sbcs.w r4, r2, r4 +; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: itt eq +; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f64i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: .pad #4 -; FULL-NEXT: sub sp, #4 +; FULL-NEXT: .save {r4, r5, r7, lr} +; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: bl __fixdfti +; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: cmp r1, r12 -; FULL-NEXT: csel lr, r1, r12, lo -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r4, r1, r12, mi -; FULL-NEXT: orrs.w r8, r2, r3 -; FULL-NEXT: csel r4, lr, r4, eq -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: mov.w r7, #-2147483648 -; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: mov r5, r3 -; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r9, r4, r7, gt -; FULL-NEXT: cmp.w r4, #-2147483648 -; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it ne -; FULL-NEXT: andne.w r5, r2, r5, asr #31 -; FULL-NEXT: and.w r2, r5, r6 -; FULL-NEXT: adds r5, r2, #1 -; FULL-NEXT: csel r2, r7, r9, eq -; FULL-NEXT: mov.w r7, #-1 -; FULL-NEXT: cmp r1, r12 -; FULL-NEXT: csel r1, r0, r7, lo -; FULL-NEXT: csel r1, r0, r1, eq -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r0, r0, r7, mi -; FULL-NEXT: cmp.w r8, #0 -; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: cmp.w r4, #-2147483648 -; FULL-NEXT: csel r1, r0, lr, hi -; FULL-NEXT: csel r1, r0, r1, eq -; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r5, #0 -; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r2 -; FULL-NEXT: add sp, #4 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; FULL-NEXT: sbcs.w lr, r1, r12 +; FULL-NEXT: sbcs lr, r2, #0 +; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: cset lr, lt +; FULL-NEXT: cmp.w lr, #0 +; FULL-NEXT: csel r5, r3, lr, ne +; FULL-NEXT: mov.w r3, #-1 +; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r2, r2, lr, ne +; FULL-NEXT: rsbs r4, r0, #0 +; FULL-NEXT: mov.w r12, #-2147483648 +; FULL-NEXT: sbcs.w r4, r12, r1 +; FULL-NEXT: sbcs.w r2, r3, r2 +; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: cset r2, lt +; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3338,73 +3128,46 @@ entry: define i64 @utest_f64i64_mm(double %x) { ; SOFT-LABEL: utest_f64i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __fixunsdfti -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: eors r4, r5 -; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r6 -; SOFT-NEXT: blo .LBB46_2 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: blo .LBB46_4 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r5, r6 -; SOFT-NEXT: .LBB46_2: @ %entry -; SOFT-NEXT: orrs r4, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB46_7 -; SOFT-NEXT: @ %bb.3: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB46_8 -; SOFT-NEXT: .LBB46_4: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB46_9 -; SOFT-NEXT: .LBB46_5: @ %entry +; SOFT-NEXT: beq .LBB46_5 +; SOFT-NEXT: .LBB46_2: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB46_10 -; SOFT-NEXT: .LBB46_6: @ %entry -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB46_7: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB46_6 +; SOFT-NEXT: .LBB46_3: @ %entry +; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: .LBB46_4: +; SOFT-NEXT: movs r4, #1 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB46_4 -; SOFT-NEXT: .LBB46_8: @ %entry +; SOFT-NEXT: bne .LBB46_2 +; SOFT-NEXT: .LBB46_5: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB46_5 -; SOFT-NEXT: .LBB46_9: @ %entry -; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB46_6 -; SOFT-NEXT: .LBB46_10: @ %entry +; SOFT-NEXT: bne .LBB46_3 +; SOFT-NEXT: .LBB46_6: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: utest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: eor r12, r2, #1 ; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo.w lr, #1 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: orr.w r12, r12, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr +; VFP2-NEXT: movlo.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq +; VFP2-NEXT: itt eq ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, r12 ; VFP2-NEXT: pop {r7, pc} ; @@ -3413,19 +3176,12 @@ define i64 @utest_f64i64_mm(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunsdfti -; FULL-NEXT: eor r12, r2, #1 ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: orr.w r12, r12, r3 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui double %x to i128 @@ -3437,108 +3193,78 @@ entry: define i64 @ustest_f64i64_mm(double %x) { ; SOFT-LABEL: ustest_f64i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: movs r7, #1 -; SOFT-NEXT: mov r6, r2 -; SOFT-NEXT: eors r6, r7 -; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: movs r1, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r5 +; SOFT-NEXT: sbcs r2, r1 ; SOFT-NEXT: blt .LBB47_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: .LBB47_2: @ %entry -; SOFT-NEXT: orrs r6, r3 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB47_12 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB47_13 -; SOFT-NEXT: .LBB47_4: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB47_6 -; SOFT-NEXT: .LBB47_5: @ %entry +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB47_3 +; SOFT-NEXT: b .LBB47_4 +; SOFT-NEXT: .LBB47_2: +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB47_4 +; SOFT-NEXT: .LBB47_3: @ %entry ; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: .LBB47_4: @ %entry +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB47_6 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB47_6: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: bpl .LBB47_14 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: bpl .LBB47_10 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB47_15 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB47_11 ; SOFT-NEXT: .LBB47_8: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB47_16 -; SOFT-NEXT: .LBB47_9: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB47_11 +; SOFT-NEXT: bpl .LBB47_12 +; SOFT-NEXT: .LBB47_9: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB47_10: @ %entry -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: .LBB47_11: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB47_12: @ %entry -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB47_4 -; SOFT-NEXT: .LBB47_13: @ %entry -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB47_5 -; SOFT-NEXT: b .LBB47_6 -; SOFT-NEXT: .LBB47_14: @ %entry -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB47_8 -; SOFT-NEXT: .LBB47_15: @ %entry -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB47_9 -; SOFT-NEXT: .LBB47_16: @ %entry -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: .LBB47_11: @ %entry +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB47_10 -; SOFT-NEXT: b .LBB47_11 +; SOFT-NEXT: bmi .LBB47_9 +; SOFT-NEXT: .LBB47_12: @ %entry +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; ; VFP2-LABEL: ustest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: eor r12, r2, #1 ; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: orr.w r12, r12, r3 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq +; VFP2-NEXT: itt eq ; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: moveq r3, r12 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi -; VFP2-NEXT: movmi lr, r3 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r0, #0 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r2 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} @@ -3548,25 +3274,17 @@ define i64 @ustest_f64i64_mm(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: eor r12, r2, #1 ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: orr.w r12, r12, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r3, r3, r2, ne ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it pl -; FULL-NEXT: movpl r3, #0 ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: it mi ; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: it mi ; FULL-NEXT: movmi r1, #0 @@ -3584,246 +3302,150 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: ldr r0, .LCPI48_0 -; SOFT-NEXT: cmp r1, r0 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: blo .LBB48_2 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: ldr r6, .LCPI48_0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r6 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: bge .LBB48_12 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: ldr r5, .LCPI48_0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB48_13 ; SOFT-NEXT: .LBB48_2: @ %entry ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bmi .LBB48_4 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: ldr r1, .LCPI48_0 +; SOFT-NEXT: beq .LBB48_14 +; SOFT-NEXT: .LBB48_3: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB48_5 ; SOFT-NEXT: .LBB48_4: @ %entry -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r7 -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: .LBB48_6: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: .LBB48_5: @ %entry +; SOFT-NEXT: mvns r0, r5 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bmi .LBB48_8 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_8: @ %entry -; SOFT-NEXT: movs r1, #1 -; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: bge .LBB48_10 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: bne .LBB48_7 +; SOFT-NEXT: @ %bb.6: @ %entry +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB48_7: @ %entry +; SOFT-NEXT: lsls r6, r4, #31 +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload +; SOFT-NEXT: rsbs r7, r7, #0 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: sbcs r7, r2 +; SOFT-NEXT: sbcs r0, r3 +; SOFT-NEXT: bge .LBB48_15 +; SOFT-NEXT: @ %bb.8: @ %entry +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB48_16 +; SOFT-NEXT: .LBB48_9: @ %entry +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB48_11 ; SOFT-NEXT: .LBB48_10: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: bhi .LBB48_12 -; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: str r4, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB48_11: @ %entry +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB48_12: @ %entry -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB48_14 -; SOFT-NEXT: @ %bb.13: @ %entry +; SOFT-NEXT: bne .LBB48_2 +; SOFT-NEXT: .LBB48_13: @ %entry ; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: b .LBB48_15 -; SOFT-NEXT: .LBB48_14: -; SOFT-NEXT: asrs r3, r7, #31 -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: ands r3, r0 -; SOFT-NEXT: .LBB48_15: @ %entry -; SOFT-NEXT: ands r3, r2 -; SOFT-NEXT: adds r0, r3, #1 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_17 -; SOFT-NEXT: @ %bb.16: @ %entry -; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: .LBB48_17: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r6, r3 -; SOFT-NEXT: ldr r0, .LCPI48_0 -; SOFT-NEXT: cmp r4, r0 -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB48_19 -; SOFT-NEXT: @ %bb.18: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB48_19: @ %entry -; SOFT-NEXT: cmp r4, r0 -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB48_21 -; SOFT-NEXT: @ %bb.20: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB48_21: @ %entry ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bmi .LBB48_23 -; SOFT-NEXT: @ %bb.22: @ %entry -; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB48_23: @ %entry -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB48_25 -; SOFT-NEXT: @ %bb.24: @ %entry -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB48_25: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bhi .LBB48_27 -; SOFT-NEXT: @ %bb.26: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_27: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB48_29 -; SOFT-NEXT: @ %bb.28: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: .LBB48_29: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bge .LBB48_31 -; SOFT-NEXT: @ %bb.30: @ %entry -; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_31: @ %entry -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB48_33 -; SOFT-NEXT: @ %bb.32: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB48_33: @ %entry -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: bne .LBB48_3 +; SOFT-NEXT: .LBB48_14: @ %entry +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB48_4 +; SOFT-NEXT: b .LBB48_5 +; SOFT-NEXT: .LBB48_15: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB48_9 +; SOFT-NEXT: .LBB48_16: @ %entry +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB48_10 +; SOFT-NEXT: b .LBB48_11 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.34: +; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI48_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; VFP2-NEXT: .pad #4 -; VFP2-NEXT: sub sp, #4 +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: mvn r8, #-2147483648 -; VFP2-NEXT: mov r12, r1 -; VFP2-NEXT: cmp r1, r8 -; VFP2-NEXT: mvn r1, #-2147483648 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r1, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: mvn r4, #-2147483648 -; VFP2-NEXT: mov.w r5, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r9, r2, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r4, r1 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r7, #-2147483648 -; VFP2-NEXT: mov.w r1, #-2147483648 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r7, r4 -; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: mov r6, r3 -; VFP2-NEXT: it hi -; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: andne.w r6, r2, r6, asr #31 -; VFP2-NEXT: and.w r2, r6, r5 -; VFP2-NEXT: mov.w r6, #-1 -; VFP2-NEXT: adds r2, #1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r7 -; VFP2-NEXT: mov.w r7, #-1 -; VFP2-NEXT: cmp r12, r8 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r7, r0 -; VFP2-NEXT: mov.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r7, r0 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r6 -; VFP2-NEXT: cmp.w r9, #0 -; VFP2-NEXT: mov.w r3, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r7 -; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: it hi -; VFP2-NEXT: movhi r3, r0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r3, r0 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: it le -; VFP2-NEXT: movle r0, lr -; VFP2-NEXT: cmp r2, #0 +; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: mvn lr, #-2147483648 +; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: sbcs r4, r2, #0 +; VFP2-NEXT: sbcs r4, r3, #0 +; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: cmp r4, #0 +; VFP2-NEXT: itet eq +; VFP2-NEXT: moveq r3, r4 +; VFP2-NEXT: movne r4, r2 +; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r3 -; VFP2-NEXT: add sp, #4 -; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: rsbs r5, r0, #0 +; VFP2-NEXT: mov.w lr, #-2147483648 +; VFP2-NEXT: sbcs.w r5, lr, r1 +; VFP2-NEXT: sbcs.w r4, r2, r4 +; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: itt eq +; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f32i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: .pad #4 -; FULL-NEXT: sub sp, #4 +; FULL-NEXT: .save {r4, r5, r7, lr} +; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: bl __fixsfti +; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: cmp r1, r12 -; FULL-NEXT: csel lr, r1, r12, lo -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r4, r1, r12, mi -; FULL-NEXT: orrs.w r8, r2, r3 -; FULL-NEXT: csel r4, lr, r4, eq -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: mov.w r7, #-2147483648 -; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: mov r5, r3 -; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r9, r4, r7, gt -; FULL-NEXT: cmp.w r4, #-2147483648 -; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it ne -; FULL-NEXT: andne.w r5, r2, r5, asr #31 -; FULL-NEXT: and.w r2, r5, r6 -; FULL-NEXT: adds r5, r2, #1 -; FULL-NEXT: csel r2, r7, r9, eq -; FULL-NEXT: mov.w r7, #-1 -; FULL-NEXT: cmp r1, r12 -; FULL-NEXT: csel r1, r0, r7, lo -; FULL-NEXT: csel r1, r0, r1, eq -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r0, r0, r7, mi -; FULL-NEXT: cmp.w r8, #0 -; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: cmp.w r4, #-2147483648 -; FULL-NEXT: csel r1, r0, lr, hi -; FULL-NEXT: csel r1, r0, r1, eq -; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r5, #0 -; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r2 -; FULL-NEXT: add sp, #4 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; FULL-NEXT: sbcs.w lr, r1, r12 +; FULL-NEXT: sbcs lr, r2, #0 +; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: cset lr, lt +; FULL-NEXT: cmp.w lr, #0 +; FULL-NEXT: csel r5, r3, lr, ne +; FULL-NEXT: mov.w r3, #-1 +; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r2, r2, lr, ne +; FULL-NEXT: rsbs r4, r0, #0 +; FULL-NEXT: mov.w r12, #-2147483648 +; FULL-NEXT: sbcs.w r4, r12, r1 +; FULL-NEXT: sbcs.w r2, r3, r2 +; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: cset r2, lt +; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3835,70 +3457,46 @@ entry: define i64 @utest_f32i64_mm(float %x) { ; SOFT-LABEL: utest_f32i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: subs r4, r2, #1 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: blo .LBB49_2 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: blo .LBB49_4 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB49_5 ; SOFT-NEXT: .LBB49_2: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB49_4 -; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: beq .LBB49_6 +; SOFT-NEXT: .LBB49_3: @ %entry +; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: .LBB49_4: +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB49_2 +; SOFT-NEXT: .LBB49_5: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB49_4: @ %entry -; SOFT-NEXT: eors r2, r5 -; SOFT-NEXT: orrs r2, r3 -; SOFT-NEXT: beq .LBB49_8 -; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB49_9 +; SOFT-NEXT: bne .LBB49_3 ; SOFT-NEXT: .LBB49_6: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB49_10 -; SOFT-NEXT: .LBB49_7: @ %entry -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB49_8: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB49_6 -; SOFT-NEXT: .LBB49_9: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB49_7 -; SOFT-NEXT: .LBB49_10: @ %entry -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: utest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: eor r12, r2, #1 ; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo.w lr, #1 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: orr.w r12, r12, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr +; VFP2-NEXT: movlo.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq +; VFP2-NEXT: itt eq ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, r12 ; VFP2-NEXT: pop {r7, pc} ; @@ -3907,19 +3505,12 @@ define i64 @utest_f32i64_mm(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunssfti -; FULL-NEXT: eor r12, r2, #1 ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: orr.w r12, r12, r3 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui float %x to i128 @@ -3931,100 +3522,78 @@ entry: define i64 @ustest_f32i64_mm(float %x) { ; SOFT-LABEL: ustest_f32i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: movs r5, #1 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: subs r6, r2, #1 -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: sbcs r6, r1 -; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: sbcs r2, r1 ; SOFT-NEXT: blt .LBB50_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r6, r1 -; SOFT-NEXT: .LBB50_2: @ %entry -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB50_3 +; SOFT-NEXT: b .LBB50_4 +; SOFT-NEXT: .LBB50_2: +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB50_4 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: .LBB50_3: @ %entry +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: .LBB50_4: @ %entry -; SOFT-NEXT: eors r2, r5 -; SOFT-NEXT: orrs r2, r3 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB50_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB50_6: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB50_8 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: bpl .LBB50_10 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB50_11 ; SOFT-NEXT: .LBB50_8: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: bpl .LBB50_13 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB50_14 +; SOFT-NEXT: bpl .LBB50_12 +; SOFT-NEXT: .LBB50_9: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB50_10: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB50_15 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB50_8 ; SOFT-NEXT: .LBB50_11: @ %entry +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB50_16 +; SOFT-NEXT: bmi .LBB50_9 ; SOFT-NEXT: .LBB50_12: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB50_13: @ %entry -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB50_10 -; SOFT-NEXT: .LBB50_14: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB50_11 -; SOFT-NEXT: .LBB50_15: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB50_12 -; SOFT-NEXT: .LBB50_16: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; ; VFP2-LABEL: ustest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: eor r12, r2, #1 ; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: orr.w r12, r12, r3 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq +; VFP2-NEXT: itt eq ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi lr, r3 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: moveq r3, r12 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r0, #0 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r2 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} @@ -4034,25 +3603,17 @@ define i64 @ustest_f32i64_mm(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: eor r12, r2, #1 ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: orr.w r12, r12, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r3, r3, r2, ne ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it pl -; FULL-NEXT: movpl r3, #0 ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: it mi ; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: it mi ; FULL-NEXT: movmi r1, #0 @@ -4070,253 +3631,157 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: ldr r0, .LCPI51_0 -; SOFT-NEXT: cmp r1, r0 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: blo .LBB51_2 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: ldr r6, .LCPI51_0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r6 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: bge .LBB51_12 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: ldr r5, .LCPI51_0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB51_13 ; SOFT-NEXT: .LBB51_2: @ %entry ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bmi .LBB51_4 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: ldr r1, .LCPI51_0 +; SOFT-NEXT: beq .LBB51_14 +; SOFT-NEXT: .LBB51_3: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB51_5 ; SOFT-NEXT: .LBB51_4: @ %entry -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r7 -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: .LBB51_6: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: .LBB51_5: @ %entry +; SOFT-NEXT: mvns r0, r5 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bmi .LBB51_8 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_8: @ %entry -; SOFT-NEXT: movs r1, #1 -; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: bge .LBB51_10 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: bne .LBB51_7 +; SOFT-NEXT: @ %bb.6: @ %entry +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB51_7: @ %entry +; SOFT-NEXT: lsls r6, r4, #31 +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload +; SOFT-NEXT: rsbs r7, r7, #0 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: sbcs r7, r2 +; SOFT-NEXT: sbcs r0, r3 +; SOFT-NEXT: bge .LBB51_15 +; SOFT-NEXT: @ %bb.8: @ %entry +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB51_16 +; SOFT-NEXT: .LBB51_9: @ %entry +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB51_11 ; SOFT-NEXT: .LBB51_10: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: bhi .LBB51_12 -; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: str r4, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB51_11: @ %entry +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB51_12: @ %entry -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB51_14 -; SOFT-NEXT: @ %bb.13: @ %entry +; SOFT-NEXT: bne .LBB51_2 +; SOFT-NEXT: .LBB51_13: @ %entry ; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: b .LBB51_15 -; SOFT-NEXT: .LBB51_14: -; SOFT-NEXT: asrs r3, r7, #31 -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: ands r3, r0 -; SOFT-NEXT: .LBB51_15: @ %entry -; SOFT-NEXT: ands r3, r2 -; SOFT-NEXT: adds r0, r3, #1 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_17 -; SOFT-NEXT: @ %bb.16: @ %entry -; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: .LBB51_17: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r6, r3 -; SOFT-NEXT: ldr r0, .LCPI51_0 -; SOFT-NEXT: cmp r4, r0 -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB51_19 -; SOFT-NEXT: @ %bb.18: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB51_19: @ %entry -; SOFT-NEXT: cmp r4, r0 -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB51_21 -; SOFT-NEXT: @ %bb.20: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB51_21: @ %entry ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bmi .LBB51_23 -; SOFT-NEXT: @ %bb.22: @ %entry -; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB51_23: @ %entry -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB51_25 -; SOFT-NEXT: @ %bb.24: @ %entry -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB51_25: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bhi .LBB51_27 -; SOFT-NEXT: @ %bb.26: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_27: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB51_29 -; SOFT-NEXT: @ %bb.28: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: .LBB51_29: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bge .LBB51_31 -; SOFT-NEXT: @ %bb.30: @ %entry -; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_31: @ %entry -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB51_33 -; SOFT-NEXT: @ %bb.32: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB51_33: @ %entry -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: bne .LBB51_3 +; SOFT-NEXT: .LBB51_14: @ %entry +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB51_4 +; SOFT-NEXT: b .LBB51_5 +; SOFT-NEXT: .LBB51_15: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB51_9 +; SOFT-NEXT: .LBB51_16: @ %entry +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB51_10 +; SOFT-NEXT: b .LBB51_11 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.34: +; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI51_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; VFP2-NEXT: .pad #4 -; VFP2-NEXT: sub sp, #4 +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: mvn r8, #-2147483648 -; VFP2-NEXT: mov r12, r1 -; VFP2-NEXT: cmp r1, r8 -; VFP2-NEXT: mvn r1, #-2147483648 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r1, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: mvn r4, #-2147483648 -; VFP2-NEXT: mov.w r5, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r9, r2, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r4, r1 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r7, #-2147483648 -; VFP2-NEXT: mov.w r1, #-2147483648 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r7, r4 -; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: mov r6, r3 -; VFP2-NEXT: it hi -; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: andne.w r6, r2, r6, asr #31 -; VFP2-NEXT: and.w r2, r6, r5 -; VFP2-NEXT: mov.w r6, #-1 -; VFP2-NEXT: adds r2, #1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r7 -; VFP2-NEXT: mov.w r7, #-1 -; VFP2-NEXT: cmp r12, r8 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r7, r0 -; VFP2-NEXT: mov.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r7, r0 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r6 -; VFP2-NEXT: cmp.w r9, #0 -; VFP2-NEXT: mov.w r3, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r7 -; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: it hi -; VFP2-NEXT: movhi r3, r0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r3, r0 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: it le -; VFP2-NEXT: movle r0, lr -; VFP2-NEXT: cmp r2, #0 +; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: mvn lr, #-2147483648 +; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: sbcs r4, r2, #0 +; VFP2-NEXT: sbcs r4, r3, #0 +; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: cmp r4, #0 +; VFP2-NEXT: itet eq +; VFP2-NEXT: moveq r3, r4 +; VFP2-NEXT: movne r4, r2 +; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r3 -; VFP2-NEXT: add sp, #4 -; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: rsbs r5, r0, #0 +; VFP2-NEXT: mov.w lr, #-2147483648 +; VFP2-NEXT: sbcs.w r5, lr, r1 +; VFP2-NEXT: sbcs.w r4, r2, r4 +; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: itt eq +; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: .pad #4 -; FULL-NEXT: sub sp, #4 +; FULL-NEXT: .save {r4, r5, r7, lr} +; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti +; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: cmp r1, r12 -; FULL-NEXT: csel lr, r1, r12, lo -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r4, r1, r12, mi -; FULL-NEXT: orrs.w r8, r2, r3 -; FULL-NEXT: csel r4, lr, r4, eq -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: mov.w r7, #-2147483648 -; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: mov r5, r3 -; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r9, r4, r7, gt -; FULL-NEXT: cmp.w r4, #-2147483648 -; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it ne -; FULL-NEXT: andne.w r5, r2, r5, asr #31 -; FULL-NEXT: and.w r2, r5, r6 -; FULL-NEXT: adds r5, r2, #1 -; FULL-NEXT: csel r2, r7, r9, eq -; FULL-NEXT: mov.w r7, #-1 -; FULL-NEXT: cmp r1, r12 -; FULL-NEXT: csel r1, r0, r7, lo -; FULL-NEXT: csel r1, r0, r1, eq -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r0, r0, r7, mi -; FULL-NEXT: cmp.w r8, #0 -; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: cmp.w r4, #-2147483648 -; FULL-NEXT: csel r1, r0, lr, hi -; FULL-NEXT: csel r1, r0, r1, eq -; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r5, #0 -; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r2 -; FULL-NEXT: add sp, #4 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; FULL-NEXT: sbcs.w lr, r1, r12 +; FULL-NEXT: sbcs lr, r2, #0 +; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: cset lr, lt +; FULL-NEXT: cmp.w lr, #0 +; FULL-NEXT: csel r5, r3, lr, ne +; FULL-NEXT: mov.w r3, #-1 +; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r2, r2, lr, ne +; FULL-NEXT: rsbs r4, r0, #0 +; FULL-NEXT: mov.w r12, #-2147483648 +; FULL-NEXT: sbcs.w r4, r12, r1 +; FULL-NEXT: sbcs.w r2, r3, r2 +; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: cset r2, lt +; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -4328,48 +3793,34 @@ entry: define i64 @utesth_f16i64_mm(half %x) { ; SOFT-LABEL: utesth_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: subs r4, r2, #1 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: blo .LBB52_2 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: blo .LBB52_4 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB52_5 ; SOFT-NEXT: .LBB52_2: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB52_4 -; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: beq .LBB52_6 +; SOFT-NEXT: .LBB52_3: @ %entry +; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: .LBB52_4: +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB52_2 +; SOFT-NEXT: .LBB52_5: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB52_4: @ %entry -; SOFT-NEXT: eors r2, r5 -; SOFT-NEXT: orrs r2, r3 -; SOFT-NEXT: beq .LBB52_8 -; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB52_9 +; SOFT-NEXT: bne .LBB52_3 ; SOFT-NEXT: .LBB52_6: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB52_10 -; SOFT-NEXT: .LBB52_7: @ %entry -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB52_8: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB52_6 -; SOFT-NEXT: .LBB52_9: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB52_7 -; SOFT-NEXT: .LBB52_10: @ %entry -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: utesth_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -4379,24 +3830,14 @@ define i64 @utesth_f16i64_mm(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: eor r12, r2, #1 ; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo.w lr, #1 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: orr.w r12, r12, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr +; VFP2-NEXT: movlo.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq +; VFP2-NEXT: itt eq ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, r12 ; VFP2-NEXT: pop {r7, pc} ; @@ -4407,19 +3848,12 @@ define i64 @utesth_f16i64_mm(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixunshfti -; FULL-NEXT: eor r12, r2, #1 ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: orr.w r12, r12, r3 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui half %x to i128 @@ -4431,69 +3865,58 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; SOFT-LABEL: ustest_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, lr} -; SOFT-NEXT: push {r4, r5, r6, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: movs r5, #1 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: subs r6, r2, #1 -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: sbcs r6, r1 -; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: sbcs r2, r1 ; SOFT-NEXT: blt .LBB53_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r6, r1 -; SOFT-NEXT: .LBB53_2: @ %entry -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB53_3 +; SOFT-NEXT: b .LBB53_4 +; SOFT-NEXT: .LBB53_2: +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB53_4 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: .LBB53_3: @ %entry +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: .LBB53_4: @ %entry -; SOFT-NEXT: eors r2, r5 -; SOFT-NEXT: orrs r2, r3 +; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB53_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB53_6: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB53_8 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: bpl .LBB53_10 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: beq .LBB53_11 ; SOFT-NEXT: .LBB53_8: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: bpl .LBB53_13 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB53_14 +; SOFT-NEXT: bpl .LBB53_12 +; SOFT-NEXT: .LBB53_9: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB53_10: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB53_15 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: bne .LBB53_8 ; SOFT-NEXT: .LBB53_11: @ %entry +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB53_16 +; SOFT-NEXT: bmi .LBB53_9 ; SOFT-NEXT: .LBB53_12: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB53_13: @ %entry -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB53_10 -; SOFT-NEXT: .LBB53_14: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB53_11 -; SOFT-NEXT: .LBB53_15: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB53_12 -; SOFT-NEXT: .LBB53_16: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; ; VFP2-LABEL: ustest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -4503,33 +3926,22 @@ define i64 @ustest_f16i64_mm(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: eor r12, r2, #1 ; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: orr.w r12, r12, r3 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq +; VFP2-NEXT: itt eq ; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: moveq r3, r12 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi -; VFP2-NEXT: movmi lr, r3 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r0, #0 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r2 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} @@ -4541,25 +3953,17 @@ define i64 @ustest_f16i64_mm(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: eor r12, r2, #1 ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: orr.w r12, r12, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r3, r3, r2, ne ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it pl -; FULL-NEXT: movpl r3, #0 ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: it mi ; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: it mi ; FULL-NEXT: movmi r1, #0 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 9d5d5d1..7809008 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -2372,8 +2372,8 @@ entry: define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 @@ -2381,43 +2381,35 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, r2, d9 -; CHECK-NEXT: cmn r4, #-2147483647 -; CHECK-NEXT: mvn r3, #-2147483648 -; CHECK-NEXT: movlo r3, r4 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: subs r3, r4, r5 +; CHECK-NEXT: sbcs r3, r1, #0 +; CHECK-NEXT: mvn r7, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: rsbs r1, r4, #-2147483648 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movpl r4, r5 -; CHECK-NEXT: movpl r1, r6 -; CHECK-NEXT: moveq r4, r3 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r3, #-2147483648 -; CHECK-NEXT: mov r7, #-2147483648 -; CHECK-NEXT: movgt r3, r4 -; CHECK-NEXT: cmp r4, #-2147483648 -; CHECK-NEXT: movls r4, r7 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: movne r4, r3 +; CHECK-NEXT: sbcs r1, r7, r3 +; CHECK-NEXT: movge r4, r8 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: movlo r2, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movmi r5, r0 -; CHECK-NEXT: movmi r6, r1 -; CHECK-NEXT: moveq r5, r2 -; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: subs r2, r0, r5 ; CHECK-NEXT: vmov.32 d0[0], r4 -; CHECK-NEXT: movgt r0, r5 -; CHECK-NEXT: cmp r5, #-2147483648 -; CHECK-NEXT: movls r5, r7 -; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: movne r6, r1 ; CHECK-NEXT: movne r5, r0 +; CHECK-NEXT: rsbs r0, r5, #-2147483648 +; CHECK-NEXT: sbcs r0, r7, r6 +; CHECK-NEXT: movge r5, r8 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x double> %x to <2 x i64> %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -2468,35 +2460,38 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r2, r12, d9 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvn r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movmi r3, r0 -; CHECK-NEXT: movpl r1, r5 -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: subs r3, r0, r5 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: movwgt r6, #1 +; CHECK-NEXT: sbcs r3, r1, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r3, #0 +; CHECK-NEXT: movwlt r6, #1 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r3 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r6, r3 +; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: subs r2, r0, r5 ; CHECK-NEXT: vmov.32 d0[0], r6 -; CHECK-NEXT: movmi r4, r0 -; CHECK-NEXT: movpl r1, r5 -; CHECK-NEXT: moveq r4, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movwgt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r4 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r5, r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: movne r5, r0 +; CHECK-NEXT: rsbs r0, r5, #0 +; CHECK-NEXT: rscs r0, r2, #0 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: vmov.32 d0[1], r4 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: @@ -2510,88 +2505,72 @@ entry: define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: mov r8, #-2147483648 -; CHECK-NEXT: mvn r7, #-2147483648 +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: mov r10, #0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: vmov r5, s16 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: movlo r0, r4 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r4, r7 -; CHECK-NEXT: movpl r1, r9 -; CHECK-NEXT: moveq r4, r0 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: movgt r0, r4 -; CHECK-NEXT: cmp r4, #-2147483648 -; CHECK-NEXT: movls r4, r8 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: movne r4, r0 +; CHECK-NEXT: mvn r7, #-2147483648 +; CHECK-NEXT: subs r0, r0, r7 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r0, r1 +; CHECK-NEXT: moveq r4, r7 +; CHECK-NEXT: rsbs r1, r4, #-2147483648 +; CHECK-NEXT: sbcs r0, r9, r0 ; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: movge r4, r8 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: mov r2, #-2147483648 -; CHECK-NEXT: movlo r0, r5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r5, r7 -; CHECK-NEXT: movpl r1, r9 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: subs r0, r0, r7 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: movgt r2, r5 -; CHECK-NEXT: cmp r5, #-2147483648 -; CHECK-NEXT: movls r5, r8 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: movne r5, r2 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r2 +; CHECK-NEXT: movge r5, r8 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: mov r2, #-2147483648 -; CHECK-NEXT: movlo r0, r6 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r6, r7 -; CHECK-NEXT: movpl r1, r9 -; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: subs r0, r0, r7 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r0, r1 +; CHECK-NEXT: moveq r6, r7 +; CHECK-NEXT: rsbs r1, r6, #-2147483648 +; CHECK-NEXT: sbcs r0, r9, r0 ; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: movgt r2, r6 -; CHECK-NEXT: cmp r6, #-2147483648 -; CHECK-NEXT: movls r6, r8 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: movge r6, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: movlo r2, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movmi r7, r0 -; CHECK-NEXT: movmi r9, r1 -; CHECK-NEXT: moveq r7, r2 -; CHECK-NEXT: cmn r9, #1 -; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: subs r2, r0, r7 ; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: movgt r0, r7 -; CHECK-NEXT: cmp r7, #-2147483648 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movne r10, r1 +; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: rsbs r0, r7, #-2147483648 ; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: movls r7, r8 -; CHECK-NEXT: cmn r9, #1 +; CHECK-NEXT: sbcs r0, r9, r10 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: movge r7, r8 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2649,75 +2628,81 @@ entry: define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r5, s16 -; CHECK-NEXT: vmov r8, s18 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvn r2, #0 -; CHECK-NEXT: movmi r2, r0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: moveq r2, r0 -; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: mvn r7, #0 +; CHECK-NEXT: subs r3, r0, r7 ; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwgt r4, #1 +; CHECK-NEXT: sbcs r3, r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: vmov r9, s18 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: moveq r0, r7 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: vmov r8, s17 +; CHECK-NEXT: rscs r1, r3, #0 +; CHECK-NEXT: movwlt r4, #1 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r2 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: moveq r4, r2 +; CHECK-NEXT: movne r4, r0 +; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvn r2, #0 -; CHECK-NEXT: movmi r2, r0 -; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: moveq r2, r0 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: subs r2, r0, r7 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: movwgt r5, #1 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: moveq r0, r7 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r2, #0 +; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r2 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: movne r5, r0 +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvn r2, #0 -; CHECK-NEXT: movmi r2, r0 -; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: moveq r2, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: subs r2, r0, r7 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwgt r6, #1 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: moveq r0, r7 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r2, #0 +; CHECK-NEXT: movwlt r6, #1 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r6, r2 +; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: subs r2, r0, r7 ; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: movmi r9, r0 -; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: moveq r9, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movwgt r7, #1 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: movne r7, r9 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: rsbs r0, r7, #0 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: moveq r7, r9 -; CHECK-NEXT: vmov.32 d0[1], r7 +; CHECK-NEXT: rscs r0, r2, #0 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movne r10, r7 +; CHECK-NEXT: vmov.32 d0[1], r10 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2729,8 +2714,8 @@ entry: define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} ; CHECK-NEON-NEXT: vmov r0, s3 @@ -2741,170 +2726,138 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s20 -; CHECK-NEON-NEXT: cmn r4, #-2147483647 -; CHECK-NEON-NEXT: mvn r2, #-2147483648 -; CHECK-NEON-NEXT: movlo r2, r4 ; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r9, #0 -; CHECK-NEON-NEXT: movpl r4, r7 -; CHECK-NEON-NEXT: movpl r1, r9 -; CHECK-NEON-NEXT: moveq r4, r2 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: mov r2, #-2147483648 +; CHECK-NEON-NEXT: subs r2, r4, r7 +; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r8, #-2147483648 -; CHECK-NEON-NEXT: movgt r2, r4 -; CHECK-NEON-NEXT: cmp r4, #-2147483648 -; CHECK-NEON-NEXT: movls r4, r8 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: mvn r9, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: moveq r4, r7 +; CHECK-NEON-NEXT: rsbs r1, r4, #-2147483648 +; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: sbcs r1, r9, r2 +; CHECK-NEON-NEXT: movge r4, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: cmn r0, #-2147483647 -; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: movlo r0, r5 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r5, r7 -; CHECK-NEON-NEXT: movpl r1, r9 -; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: subs r0, r0, r7 +; CHECK-NEON-NEXT: sbcs r0, r1, #0 +; CHECK-NEON-NEXT: mov r2, #0 ; CHECK-NEON-NEXT: vmov r0, s18 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movgt r2, r5 -; CHECK-NEON-NEXT: cmp r5, #-2147483648 -; CHECK-NEON-NEXT: movls r5, r8 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movne r5, r2 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: moveq r5, r7 +; CHECK-NEON-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r2 +; CHECK-NEON-NEXT: movge r5, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: cmn r0, #-2147483647 -; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: movlo r0, r6 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r6, r7 -; CHECK-NEON-NEXT: movpl r1, r9 -; CHECK-NEON-NEXT: moveq r6, r0 +; CHECK-NEON-NEXT: subs r0, r0, r7 +; CHECK-NEON-NEXT: sbcs r0, r1, #0 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: movne r0, r1 +; CHECK-NEON-NEXT: moveq r6, r7 +; CHECK-NEON-NEXT: rsbs r1, r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r0, r9, r0 ; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movgt r2, r6 -; CHECK-NEON-NEXT: cmp r6, #-2147483648 -; CHECK-NEON-NEXT: movls r6, r8 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movne r6, r2 +; CHECK-NEON-NEXT: movge r6, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: cmn r0, #-2147483647 -; CHECK-NEON-NEXT: mvn r2, #-2147483648 -; CHECK-NEON-NEXT: movlo r2, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movmi r7, r0 -; CHECK-NEON-NEXT: movmi r9, r1 -; CHECK-NEON-NEXT: moveq r7, r2 -; CHECK-NEON-NEXT: cmn r9, #1 -; CHECK-NEON-NEXT: mov r0, #-2147483648 +; CHECK-NEON-NEXT: subs r2, r0, r7 ; CHECK-NEON-NEXT: vmov.32 d1[0], r6 -; CHECK-NEON-NEXT: movgt r0, r7 -; CHECK-NEON-NEXT: cmp r7, #-2147483648 +; CHECK-NEON-NEXT: sbcs r2, r1, #0 +; CHECK-NEON-NEXT: movwlt r10, #1 +; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: movne r10, r1 +; CHECK-NEON-NEXT: movne r7, r0 +; CHECK-NEON-NEXT: rsbs r0, r7, #-2147483648 ; CHECK-NEON-NEXT: vmov.32 d0[0], r5 -; CHECK-NEON-NEXT: movls r7, r8 -; CHECK-NEON-NEXT: cmn r9, #1 +; CHECK-NEON-NEXT: sbcs r0, r9, r10 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movne r7, r0 +; CHECK-NEON-NEXT: movge r7, r8 ; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: stest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-FP16-NEXT: .vsave {d8, d9} ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 +; CHECK-FP16-NEXT: vmov.u16 r5, d0[2] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mov r4, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] -; CHECK-FP16-NEXT: vmov.u16 r2, d8[0] -; CHECK-FP16-NEXT: cmn r4, #-2147483647 ; CHECK-FP16-NEXT: mvn r7, #-2147483648 -; CHECK-FP16-NEXT: mov r9, #0 +; CHECK-FP16-NEXT: subs r0, r0, r7 +; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r8, #-2147483648 -; CHECK-FP16-NEXT: vmov s18, r0 -; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movlo r0, r4 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r4, r7 -; CHECK-FP16-NEXT: movpl r1, r9 -; CHECK-FP16-NEXT: moveq r4, r0 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: vmov s0, r2 -; CHECK-FP16-NEXT: movgt r0, r4 -; CHECK-FP16-NEXT: cmp r4, #-2147483648 -; CHECK-FP16-NEXT: movls r4, r8 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movne r4, r0 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: mvn r9, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: movne r0, r1 +; CHECK-FP16-NEXT: moveq r4, r7 +; CHECK-FP16-NEXT: rsbs r1, r4, #-2147483648 +; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: sbcs r0, r9, r0 +; CHECK-FP16-NEXT: vmov s18, r5 +; CHECK-FP16-NEXT: movge r4, r8 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: cmn r0, #-2147483647 -; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movlo r0, r5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r5, r7 -; CHECK-FP16-NEXT: movpl r1, r9 -; CHECK-FP16-NEXT: moveq r5, r0 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movgt r0, r5 -; CHECK-FP16-NEXT: cmp r5, #-2147483648 -; CHECK-FP16-NEXT: movls r5, r8 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movne r5, r0 +; CHECK-FP16-NEXT: subs r0, r0, r7 +; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: movne r0, r1 +; CHECK-FP16-NEXT: moveq r5, r7 +; CHECK-FP16-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r9, r0 +; CHECK-FP16-NEXT: movge r5, r8 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] ; CHECK-FP16-NEXT: mov r6, r0 -; CHECK-FP16-NEXT: cmn r0, #-2147483647 -; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movlo r0, r6 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r6, r7 -; CHECK-FP16-NEXT: movpl r1, r9 -; CHECK-FP16-NEXT: moveq r6, r0 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movgt r0, r6 -; CHECK-FP16-NEXT: cmp r6, #-2147483648 -; CHECK-FP16-NEXT: movls r6, r8 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movne r6, r0 -; CHECK-FP16-NEXT: vmov s0, r2 +; CHECK-FP16-NEXT: subs r0, r0, r7 +; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: movne r0, r1 +; CHECK-FP16-NEXT: vmov.u16 r1, d8[1] +; CHECK-FP16-NEXT: moveq r6, r7 +; CHECK-FP16-NEXT: vmov s0, r1 +; CHECK-FP16-NEXT: rsbs r1, r6, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r9, r0 +; CHECK-FP16-NEXT: movge r6, r8 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: cmn r0, #-2147483647 -; CHECK-FP16-NEXT: mvn r2, #-2147483648 -; CHECK-FP16-NEXT: movlo r2, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movmi r7, r0 -; CHECK-FP16-NEXT: movmi r9, r1 -; CHECK-FP16-NEXT: moveq r7, r2 -; CHECK-FP16-NEXT: cmn r9, #1 -; CHECK-FP16-NEXT: mov r0, #-2147483648 +; CHECK-FP16-NEXT: subs r2, r0, r7 ; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: movgt r0, r7 -; CHECK-FP16-NEXT: cmp r7, #-2147483648 +; CHECK-FP16-NEXT: sbcs r2, r1, #0 +; CHECK-FP16-NEXT: movwlt r10, #1 +; CHECK-FP16-NEXT: cmp r10, #0 +; CHECK-FP16-NEXT: movne r10, r1 +; CHECK-FP16-NEXT: movne r7, r0 +; CHECK-FP16-NEXT: rsbs r0, r7, #-2147483648 ; CHECK-FP16-NEXT: vmov.32 d0[0], r5 -; CHECK-FP16-NEXT: movls r7, r8 -; CHECK-FP16-NEXT: cmn r9, #1 +; CHECK-FP16-NEXT: sbcs r0, r9, r10 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: movne r7, r0 +; CHECK-FP16-NEXT: movge r7, r8 ; CHECK-FP16-NEXT: vmov.32 d0[1], r7 ; CHECK-FP16-NEXT: vpop {d8, d9} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -3011,81 +2964,87 @@ entry: define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s18, s2 -; CHECK-NEON-NEXT: vmov.f32 s16, s1 +; CHECK-NEON-NEXT: vmov.f32 s16, s2 +; CHECK-NEON-NEXT: vmov.f32 s18, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov r2, s20 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvn r3, #0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movmi r3, r0 -; CHECK-NEON-NEXT: movpl r1, r6 -; CHECK-NEON-NEXT: moveq r3, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: mvn r7, #0 +; CHECK-NEON-NEXT: subs r3, r0, r7 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: sbcs r3, r1, #0 +; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: mov r3, #0 ; CHECK-NEON-NEXT: vmov r8, s18 -; CHECK-NEON-NEXT: movwgt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movne r7, r3 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: moveq r7, r3 +; CHECK-NEON-NEXT: movwlt r3, #1 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: movne r3, r1 +; CHECK-NEON-NEXT: moveq r0, r7 +; CHECK-NEON-NEXT: rsbs r1, r0, #0 +; CHECK-NEON-NEXT: vmov r9, s16 +; CHECK-NEON-NEXT: rscs r1, r3, #0 +; CHECK-NEON-NEXT: movwlt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: movne r4, r0 ; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvn r2, #0 -; CHECK-NEON-NEXT: movmi r2, r0 -; CHECK-NEON-NEXT: movpl r1, r6 -; CHECK-NEON-NEXT: moveq r2, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: mov r0, r8 -; CHECK-NEON-NEXT: movwgt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movne r4, r2 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r4, r2 -; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvn r2, #0 -; CHECK-NEON-NEXT: movmi r2, r0 -; CHECK-NEON-NEXT: movpl r1, r6 -; CHECK-NEON-NEXT: moveq r2, r0 -; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: subs r2, r0, r7 ; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movwgt r5, #1 +; CHECK-NEON-NEXT: sbcs r2, r1, #0 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: moveq r0, r7 +; CHECK-NEON-NEXT: rsbs r1, r0, #0 +; CHECK-NEON-NEXT: rscs r1, r2, #0 +; CHECK-NEON-NEXT: movwlt r5, #1 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r2 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r5, r2 +; CHECK-NEON-NEXT: movne r5, r0 +; CHECK-NEON-NEXT: mov r0, r9 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: movmi r9, r0 -; CHECK-NEON-NEXT: movpl r1, r6 -; CHECK-NEON-NEXT: moveq r9, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movwgt r6, #1 +; CHECK-NEON-NEXT: subs r2, r0, r7 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: sbcs r2, r1, #0 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: moveq r0, r7 +; CHECK-NEON-NEXT: rsbs r1, r0, #0 +; CHECK-NEON-NEXT: rscs r1, r2, #0 +; CHECK-NEON-NEXT: movwlt r6, #1 ; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r4 -; CHECK-NEON-NEXT: movne r6, r9 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r7 -; CHECK-NEON-NEXT: moveq r6, r9 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: movne r6, r0 +; CHECK-NEON-NEXT: mov r0, r8 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: subs r2, r0, r7 +; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: sbcs r2, r1, #0 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r5 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: movne r7, r0 +; CHECK-NEON-NEXT: rsbs r0, r7, #0 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: rscs r0, r2, #0 +; CHECK-NEON-NEXT: movwlt r10, #1 +; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: movne r10, r7 +; CHECK-NEON-NEXT: vmov.32 d0[1], r10 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: ustest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry @@ -3095,71 +3054,76 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r7, d0[2] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: vmov.u16 r7, d8[0] -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: vmov.u16 r3, d8[2] -; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: mvn r5, #0 +; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: mvn r8, #0 +; CHECK-FP16-NEXT: mov r8, #0 +; CHECK-FP16-NEXT: vmov s18, r7 ; CHECK-FP16-NEXT: vmov s16, r2 -; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movmi r2, r0 -; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: moveq r2, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r6, #1 +; CHECK-FP16-NEXT: subs r2, r0, r5 +; CHECK-FP16-NEXT: sbcs r2, r1, #0 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: moveq r0, r5 +; CHECK-FP16-NEXT: rsbs r1, r0, #0 +; CHECK-FP16-NEXT: rscs r1, r2, #0 +; CHECK-FP16-NEXT: movwlt r6, #1 ; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: movne r6, r2 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: vmov s18, r3 -; CHECK-FP16-NEXT: moveq r6, r2 +; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: subs r2, r0, r5 ; CHECK-FP16-NEXT: vmov.f32 s0, s18 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movpl r1, r5 -; CHECK-FP16-NEXT: movmi r2, r0 +; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: moveq r2, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r7, #1 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: moveq r0, r5 +; CHECK-FP16-NEXT: rsbs r1, r0, #0 +; CHECK-FP16-NEXT: rscs r1, r2, #0 +; CHECK-FP16-NEXT: movwlt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r2 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r7, r2 +; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: subs r2, r0, r5 ; CHECK-FP16-NEXT: vmov.f32 s0, s16 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movpl r1, r5 -; CHECK-FP16-NEXT: movmi r2, r0 +; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: mov r4, #0 -; CHECK-FP16-NEXT: moveq r2, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r4, #1 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: moveq r0, r5 +; CHECK-FP16-NEXT: rsbs r1, r0, #0 +; CHECK-FP16-NEXT: rscs r1, r2, #0 +; CHECK-FP16-NEXT: movwlt r4, #1 ; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movne r4, r2 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r4, r2 +; CHECK-FP16-NEXT: movne r4, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: subs r2, r0, r5 ; CHECK-FP16-NEXT: vmov.32 d1[0], r4 -; CHECK-FP16-NEXT: movmi r8, r0 -; CHECK-FP16-NEXT: movpl r1, r5 -; CHECK-FP16-NEXT: moveq r8, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: sbcs r2, r1, #0 +; CHECK-FP16-NEXT: mov r2, #0 ; CHECK-FP16-NEXT: vmov.32 d0[0], r7 -; CHECK-FP16-NEXT: movne r5, r8 -; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: movne r5, r0 +; CHECK-FP16-NEXT: rsbs r0, r5, #0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r6 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 +; CHECK-FP16-NEXT: rscs r0, r2, #0 +; CHECK-FP16-NEXT: movwlt r8, #1 +; CHECK-FP16-NEXT: cmp r8, #0 +; CHECK-FP16-NEXT: movne r8, r5 +; CHECK-FP16-NEXT: vmov.32 d0[1], r8 ; CHECK-FP16-NEXT: vpop {d8, d9} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: @@ -3659,113 +3623,68 @@ entry: define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: mov r10, #0 -; CHECK-NEXT: andne r0, r2, r0, asr #31 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: movmi r10, r3 -; CHECK-NEXT: and r1, r0, r10 -; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: movlo r0, r11 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mvn r8, #0 +; CHECK-NEXT: subs r0, r0, r8 +; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: sbcs r0, r1, r6 +; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: sbcs r0, r2, #0 ; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: movmi r8, r11 -; CHECK-NEXT: orrs r2, r2, r3 -; CHECK-NEXT: moveq r8, r0 -; CHECK-NEXT: cmn r10, #1 -; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: mov r9, #-2147483648 -; CHECK-NEXT: movgt r0, r8 -; CHECK-NEXT: cmp r8, #-2147483648 -; CHECK-NEXT: movhi r9, r8 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: movne r9, r0 +; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: movne r0, r2 +; CHECK-NEXT: moveq r10, r6 +; CHECK-NEXT: moveq r5, r8 +; CHECK-NEXT: rsbs r1, r5, #0 +; CHECK-NEXT: rscs r1, r10, #-2147483648 +; CHECK-NEXT: sbcs r0, r8, r0 +; CHECK-NEXT: sbcs r0, r8, r3 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: moveq r5, r7 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #0 -; CHECK-NEXT: movlo r5, r0 -; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movpl r0, r4 -; CHECK-NEXT: orrs r12, r2, r3 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movlo r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r7, r1 -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: cmp r7, #-2147483648 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: movhi r1, r0 -; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: moveq r1, r0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: movmi r6, r5 -; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: movlo r4, r5 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: cmp r8, #-2147483648 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movhi r6, r4 -; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: cmn r10, #1 -; CHECK-NEXT: movle r4, r12 -; CHECK-NEXT: cmn r5, #1 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: vmov.32 d1[0], r4 -; CHECK-NEXT: movmi r6, r3 -; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: movle r0, r12 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: andne r3, r2, r3, asr #31 -; CHECK-NEXT: and r2, r3, r6 -; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: moveq r0, r1 -; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: subs r4, r0, r8 +; CHECK-NEXT: vmov.32 d1[0], r5 +; CHECK-NEXT: sbcs r4, r1, r6 +; CHECK-NEXT: sbcs r4, r2, #0 +; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: moveq r3, r4 +; CHECK-NEXT: movne r6, r1 +; CHECK-NEXT: movne r4, r2 +; CHECK-NEXT: moveq r0, r8 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r6, #-2147483648 +; CHECK-NEXT: sbcs r1, r8, r4 +; CHECK-NEXT: sbcs r1, r8, r3 +; CHECK-NEXT: movwlt r9, #1 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: moveq r0, r9 ; CHECK-NEXT: mov r1, #-2147483648 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: movgt r1, r7 -; CHECK-NEXT: cmp r7, #-2147483648 -; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: vmov.32 d1[1], r9 -; CHECK-NEXT: movls r7, r0 -; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: movne r7, r1 -; CHECK-NEXT: vmov.32 d0[1], r7 -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: moveq r10, r1 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: vmov.32 d1[1], r10 +; CHECK-NEXT: moveq r6, r1 +; CHECK-NEXT: vmov.32 d0[1], r6 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3784,41 +3703,30 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: eor r1, r2, #1 -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr r1, r1, r3 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r7, r6 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: subs r0, r2, #1 ; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: moveq r7, r1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r6, r1 +; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: movwlo r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: moveq r5, r7 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: eor r4, r2, #1 ; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr r4, r4, r3 -; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r0, r4 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwlo r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: movne r7, r4 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: vmov.32 d1[1], r7 -; CHECK-NEXT: moveq r5, r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: movne r6, r1 +; CHECK-NEXT: vmov.32 d0[1], r6 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} entry: @@ -3831,65 +3739,49 @@ entry: define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: eor r0, r2, #1 -; CHECK-NEXT: orr r10, r0, r3 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: subs r0, r2, #1 +; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: moveq r4, r10 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movpl r5, r9 +; CHECK-NEXT: moveq r6, r7 +; CHECK-NEXT: moveq r5, r7 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movwmi r4, #0 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwmi r6, #0 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: eor r6, r2, #1 ; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr r6, r6, r3 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r0, r2 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: moveq r0, r4 +; CHECK-NEXT: moveq r3, r4 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movwmi r0, #0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: movne r4, r1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: vmov.32 d1[0], r0 +; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: movne r7, r8 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: moveq r7, r10 ; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: movwmi r7, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r9, r3 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movwmi r0, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: moveq r2, r6 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movwmi r2, #0 -; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3901,113 +3793,68 @@ entry: define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mvn r8, #0 +; CHECK-NEXT: subs r0, r0, r8 +; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: sbcs r0, r1, r6 ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: andne r0, r2, r0, asr #31 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: movmi r10, r3 -; CHECK-NEXT: and r1, r0, r10 -; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: mvn r8, #-2147483648 -; CHECK-NEXT: movlo r0, r11 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r8, r11 -; CHECK-NEXT: orrs r2, r2, r3 -; CHECK-NEXT: moveq r8, r0 -; CHECK-NEXT: cmn r10, #1 -; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: mov r9, #-2147483648 -; CHECK-NEXT: movgt r0, r8 -; CHECK-NEXT: cmp r8, #-2147483648 -; CHECK-NEXT: movhi r9, r8 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: movne r9, r0 +; CHECK-NEXT: sbcs r0, r2, #0 +; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: movne r0, r2 +; CHECK-NEXT: moveq r10, r6 +; CHECK-NEXT: moveq r5, r8 +; CHECK-NEXT: rsbs r1, r5, #0 +; CHECK-NEXT: rscs r1, r10, #-2147483648 +; CHECK-NEXT: sbcs r0, r8, r0 +; CHECK-NEXT: sbcs r0, r8, r3 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: moveq r5, r7 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #0 -; CHECK-NEXT: movlo r5, r0 -; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movpl r0, r4 -; CHECK-NEXT: orrs r12, r2, r3 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movlo r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r7, r1 -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: cmp r7, #-2147483648 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: movhi r1, r0 -; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: moveq r1, r0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: movmi r6, r5 -; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: movlo r4, r5 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: cmp r8, #-2147483648 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movhi r6, r4 -; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: cmn r10, #1 -; CHECK-NEXT: movle r4, r12 -; CHECK-NEXT: cmn r5, #1 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: vmov.32 d1[0], r4 -; CHECK-NEXT: movmi r6, r3 -; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: movle r0, r12 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: andne r3, r2, r3, asr #31 -; CHECK-NEXT: and r2, r3, r6 -; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: moveq r0, r1 -; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: subs r4, r0, r8 +; CHECK-NEXT: vmov.32 d1[0], r5 +; CHECK-NEXT: sbcs r4, r1, r6 +; CHECK-NEXT: sbcs r4, r2, #0 +; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: moveq r3, r4 +; CHECK-NEXT: movne r6, r1 +; CHECK-NEXT: movne r4, r2 +; CHECK-NEXT: moveq r0, r8 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r6, #-2147483648 +; CHECK-NEXT: sbcs r1, r8, r4 +; CHECK-NEXT: sbcs r1, r8, r3 +; CHECK-NEXT: movwlt r9, #1 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: moveq r0, r9 ; CHECK-NEXT: mov r1, #-2147483648 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: movgt r1, r7 -; CHECK-NEXT: cmp r7, #-2147483648 -; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: vmov.32 d1[1], r9 -; CHECK-NEXT: movls r7, r0 -; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: movne r7, r1 -; CHECK-NEXT: vmov.32 d0[1], r7 -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: moveq r10, r1 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: vmov.32 d1[1], r10 +; CHECK-NEXT: moveq r6, r1 +; CHECK-NEXT: vmov.32 d0[1], r6 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4027,40 +3874,29 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: eor r1, r2, #1 -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: subs r0, r2, #1 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: movwlo r7, #1 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: orr r1, r1, r3 -; CHECK-NEXT: moveq r7, r6 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: moveq r7, r1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r6, r1 +; CHECK-NEXT: moveq r5, r7 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: eor r4, r2, #1 ; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr r4, r4, r3 -; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r0, r4 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwlo r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: movne r7, r4 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: vmov.32 d1[1], r7 -; CHECK-NEXT: moveq r5, r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: movne r6, r1 +; CHECK-NEXT: vmov.32 d0[1], r6 ; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} entry: @@ -4073,65 +3909,49 @@ entry: define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: eor r0, r2, #1 -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: orr r10, r0, r3 +; CHECK-NEXT: vmov.f32 s0, s17 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: subs r0, r2, #1 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: moveq r4, r10 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movpl r5, r9 +; CHECK-NEXT: moveq r6, r7 +; CHECK-NEXT: moveq r5, r7 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movwmi r4, #0 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwmi r6, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: eor r6, r2, #1 ; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr r6, r6, r3 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r0, r2 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: moveq r0, r4 +; CHECK-NEXT: moveq r3, r4 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movwmi r0, #0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: movne r4, r1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: vmov.32 d1[0], r0 +; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: movne r7, r8 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: moveq r7, r10 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movwmi r7, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r9, r3 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movwmi r0, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: moveq r2, r6 -; CHECK-NEXT: cmp r9, #0 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movwmi r2, #0 -; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: movwmi r7, #0 +; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4149,223 +3969,133 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: sub sp, sp, #4 ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} -; CHECK-NEON-NEXT: .pad #16 -; CHECK-NEON-NEXT: sub sp, sp, #16 -; CHECK-NEON-NEXT: vmov r0, s1 -; CHECK-NEON-NEXT: vmov.f32 s16, s0 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mov r0, r3 -; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: andne r0, r2, r0, asr #31 -; CHECK-NEON-NEXT: mov r11, r1 -; CHECK-NEON-NEXT: movmi r10, r3 -; CHECK-NEON-NEXT: and r1, r0, r10 -; CHECK-NEON-NEXT: cmn r11, #-2147483647 -; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: movlo r0, r11 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mvn r8, #-2147483648 -; CHECK-NEON-NEXT: mov r9, #-2147483648 -; CHECK-NEON-NEXT: movmi r8, r11 -; CHECK-NEON-NEXT: orrs r2, r2, r3 -; CHECK-NEON-NEXT: moveq r8, r0 -; CHECK-NEON-NEXT: cmn r10, #1 -; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: mov r6, r3 -; CHECK-NEON-NEXT: movgt r0, r8 -; CHECK-NEON-NEXT: cmp r8, #-2147483648 -; CHECK-NEON-NEXT: movhi r9, r8 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movne r9, r0 +; CHECK-NEON-NEXT: mov r8, r0 ; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r5, #0 -; CHECK-NEON-NEXT: movlo r5, r0 -; CHECK-NEON-NEXT: mvn r4, #0 -; CHECK-NEON-NEXT: moveq r5, r0 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movpl r0, r4 -; CHECK-NEON-NEXT: orrs r12, r2, r3 -; CHECK-NEON-NEXT: moveq r0, r5 -; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r5, #-2147483648 -; CHECK-NEON-NEXT: movlo r5, r1 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movmi r7, r1 -; CHECK-NEON-NEXT: cmp r12, #0 -; CHECK-NEON-NEXT: moveq r7, r5 -; CHECK-NEON-NEXT: cmp r7, #-2147483648 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEON-NEXT: movhi r1, r0 -; CHECK-NEON-NEXT: mov r12, #0 -; CHECK-NEON-NEXT: moveq r1, r0 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mvn r6, #0 -; CHECK-NEON-NEXT: movmi r6, r5 -; CHECK-NEON-NEXT: cmn r11, #-2147483647 -; CHECK-NEON-NEXT: movlo r4, r5 -; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEON-NEXT: movne r4, r6 -; CHECK-NEON-NEXT: cmp r8, #-2147483648 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movhi r6, r4 -; CHECK-NEON-NEXT: moveq r6, r4 -; CHECK-NEON-NEXT: cmn r10, #1 -; CHECK-NEON-NEXT: movle r4, r12 -; CHECK-NEON-NEXT: cmn r5, #1 -; CHECK-NEON-NEXT: moveq r4, r6 -; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: mvn r9, #0 +; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: mvn r7, #-2147483648 +; CHECK-NEON-NEXT: sbcs r0, r1, r7 +; CHECK-NEON-NEXT: mov r11, r1 +; CHECK-NEON-NEXT: sbcs r0, r2, #0 +; CHECK-NEON-NEXT: vmov s0, r8 +; CHECK-NEON-NEXT: sbcs r0, r3, #0 ; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: vmov.32 d1[0], r4 -; CHECK-NEON-NEXT: movmi r6, r3 -; CHECK-NEON-NEXT: cmn r6, #1 -; CHECK-NEON-NEXT: movle r0, r12 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: andne r3, r2, r3, asr #31 -; CHECK-NEON-NEXT: and r2, r3, r6 -; CHECK-NEON-NEXT: cmn r2, #1 -; CHECK-NEON-NEXT: moveq r0, r1 -; CHECK-NEON-NEXT: cmn r6, #1 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: moveq r3, r0 +; CHECK-NEON-NEXT: movne r0, r2 +; CHECK-NEON-NEXT: moveq r11, r7 +; CHECK-NEON-NEXT: moveq r5, r9 +; CHECK-NEON-NEXT: rsbs r1, r5, #0 +; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648 +; CHECK-NEON-NEXT: sbcs r0, r9, r0 +; CHECK-NEON-NEXT: sbcs r0, r9, r3 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: moveq r5, r6 +; CHECK-NEON-NEXT: bl __fixsfti +; CHECK-NEON-NEXT: subs r4, r0, r9 +; CHECK-NEON-NEXT: vmov.32 d1[0], r5 +; CHECK-NEON-NEXT: sbcs r4, r1, r7 +; CHECK-NEON-NEXT: sbcs r4, r2, #0 +; CHECK-NEON-NEXT: sbcs r4, r3, #0 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: movwlt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: moveq r3, r4 +; CHECK-NEON-NEXT: movne r7, r1 +; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: moveq r0, r9 +; CHECK-NEON-NEXT: rsbs r1, r0, #0 +; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r4 +; CHECK-NEON-NEXT: sbcs r1, r9, r3 +; CHECK-NEON-NEXT: movwlt r10, #1 +; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: moveq r0, r10 ; CHECK-NEON-NEXT: mov r1, #-2147483648 +; CHECK-NEON-NEXT: cmp r6, #0 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: movgt r1, r7 -; CHECK-NEON-NEXT: cmp r7, #-2147483648 -; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d1[1], r9 -; CHECK-NEON-NEXT: movls r7, r0 -; CHECK-NEON-NEXT: cmn r2, #1 -; CHECK-NEON-NEXT: movne r7, r1 +; CHECK-NEON-NEXT: moveq r11, r1 +; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: vmov.32 d1[1], r11 +; CHECK-NEON-NEXT: moveq r7, r1 ; CHECK-NEON-NEXT: vmov.32 d0[1], r7 -; CHECK-NEON-NEXT: add sp, sp, #16 ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: add sp, sp, #4 ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-FP16-LABEL: stest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP16-NEXT: .pad #4 -; CHECK-FP16-NEXT: sub sp, sp, #4 -; CHECK-FP16-NEXT: .vsave {d8} -; CHECK-FP16-NEXT: vpush {d8} -; CHECK-FP16-NEXT: .pad #16 -; CHECK-FP16-NEXT: sub sp, sp, #16 +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vorr d8, d0, d0 +; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mov r0, r3 -; CHECK-FP16-NEXT: mov r10, #0 -; CHECK-FP16-NEXT: andne r0, r2, r0, asr #31 -; CHECK-FP16-NEXT: mov r11, r1 -; CHECK-FP16-NEXT: movmi r10, r3 -; CHECK-FP16-NEXT: and r1, r0, r10 -; CHECK-FP16-NEXT: cmn r11, #-2147483647 -; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movlo r0, r11 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mvn r8, #-2147483648 -; CHECK-FP16-NEXT: mov r9, #-2147483648 -; CHECK-FP16-NEXT: movmi r8, r11 -; CHECK-FP16-NEXT: orrs r2, r2, r3 -; CHECK-FP16-NEXT: moveq r8, r0 -; CHECK-FP16-NEXT: cmn r10, #1 -; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: mov r6, r3 -; CHECK-FP16-NEXT: movgt r0, r8 -; CHECK-FP16-NEXT: cmp r8, #-2147483648 -; CHECK-FP16-NEXT: movhi r9, r8 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movne r9, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] -; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-FP16-NEXT: mvn r7, #-2147483648 -; CHECK-FP16-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-FP16-NEXT: vmov s0, r0 +; CHECK-FP16-NEXT: mov r5, r0 +; CHECK-FP16-NEXT: mvn r8, #0 +; CHECK-FP16-NEXT: subs r0, r0, r8 +; CHECK-FP16-NEXT: mvn r6, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r1, r6 +; CHECK-FP16-NEXT: mov r10, r1 +; CHECK-FP16-NEXT: sbcs r0, r2, #0 +; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: sbcs r0, r3, #0 +; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: mov r9, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: moveq r3, r0 +; CHECK-FP16-NEXT: movne r0, r2 +; CHECK-FP16-NEXT: moveq r10, r6 +; CHECK-FP16-NEXT: moveq r5, r8 +; CHECK-FP16-NEXT: rsbs r1, r5, #0 +; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r8, r0 +; CHECK-FP16-NEXT: sbcs r0, r8, r3 +; CHECK-FP16-NEXT: movwlt r7, #1 +; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: moveq r5, r7 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r5, #0 -; CHECK-FP16-NEXT: movlo r5, r0 -; CHECK-FP16-NEXT: mvn r4, #0 -; CHECK-FP16-NEXT: moveq r5, r0 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movpl r0, r4 -; CHECK-FP16-NEXT: orrs r12, r2, r3 -; CHECK-FP16-NEXT: moveq r0, r5 -; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: movlo r5, r1 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movmi r7, r1 -; CHECK-FP16-NEXT: cmp r12, #0 -; CHECK-FP16-NEXT: moveq r7, r5 -; CHECK-FP16-NEXT: cmp r7, #-2147483648 -; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-FP16-NEXT: movhi r1, r0 -; CHECK-FP16-NEXT: mov r12, #0 -; CHECK-FP16-NEXT: moveq r1, r0 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: mvn r6, #0 -; CHECK-FP16-NEXT: movmi r6, r5 -; CHECK-FP16-NEXT: cmn r11, #-2147483647 -; CHECK-FP16-NEXT: movlo r4, r5 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-FP16-NEXT: movne r4, r6 -; CHECK-FP16-NEXT: cmp r8, #-2147483648 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: movhi r6, r4 -; CHECK-FP16-NEXT: moveq r6, r4 -; CHECK-FP16-NEXT: cmn r10, #1 -; CHECK-FP16-NEXT: movle r4, r12 -; CHECK-FP16-NEXT: cmn r5, #1 -; CHECK-FP16-NEXT: moveq r4, r6 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: vmov.32 d1[0], r4 -; CHECK-FP16-NEXT: movmi r6, r3 -; CHECK-FP16-NEXT: cmn r6, #1 -; CHECK-FP16-NEXT: movle r0, r12 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: andne r3, r2, r3, asr #31 -; CHECK-FP16-NEXT: and r2, r3, r6 -; CHECK-FP16-NEXT: cmn r2, #1 -; CHECK-FP16-NEXT: moveq r0, r1 -; CHECK-FP16-NEXT: cmn r6, #1 +; CHECK-FP16-NEXT: subs r4, r0, r8 +; CHECK-FP16-NEXT: vmov.32 d1[0], r5 +; CHECK-FP16-NEXT: sbcs r4, r1, r6 +; CHECK-FP16-NEXT: sbcs r4, r2, #0 +; CHECK-FP16-NEXT: sbcs r4, r3, #0 +; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: movwlt r4, #1 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: moveq r3, r4 +; CHECK-FP16-NEXT: movne r6, r1 +; CHECK-FP16-NEXT: movne r4, r2 +; CHECK-FP16-NEXT: moveq r0, r8 +; CHECK-FP16-NEXT: rsbs r1, r0, #0 +; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r8, r4 +; CHECK-FP16-NEXT: sbcs r1, r8, r3 +; CHECK-FP16-NEXT: movwlt r9, #1 +; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: moveq r0, r9 ; CHECK-FP16-NEXT: mov r1, #-2147483648 +; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: movgt r1, r7 -; CHECK-FP16-NEXT: cmp r7, #-2147483648 -; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d1[1], r9 -; CHECK-FP16-NEXT: movls r7, r0 -; CHECK-FP16-NEXT: cmn r2, #1 -; CHECK-FP16-NEXT: movne r7, r1 -; CHECK-FP16-NEXT: vmov.32 d0[1], r7 -; CHECK-FP16-NEXT: add sp, sp, #16 -; CHECK-FP16-NEXT: vpop {d8} -; CHECK-FP16-NEXT: add sp, sp, #4 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-FP16-NEXT: moveq r10, r1 +; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: vmov.32 d1[1], r10 +; CHECK-FP16-NEXT: moveq r6, r1 +; CHECK-FP16-NEXT: vmov.32 d0[1], r6 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4389,41 +4119,30 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r7, r1 -; CHECK-NEON-NEXT: eor r1, r2, #1 -; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: orr r1, r1, r3 -; CHECK-NEON-NEXT: movwlo r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r7, r6 -; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: subs r0, r2, #1 ; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: moveq r7, r1 -; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: sbcs r0, r3, #0 ; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movne r6, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r6, r1 -; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: eor r4, r2, #1 -; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: orr r4, r4, r3 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: movwlo r5, #1 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r0, r5 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: moveq r0, r4 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: moveq r6, r5 +; CHECK-NEON-NEXT: bl __fixunssfti +; CHECK-NEON-NEXT: subs r2, r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: sbcs r2, r3, #0 +; CHECK-NEON-NEXT: movwlo r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: moveq r0, r7 ; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: movne r5, r4 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: movne r5, r1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r7 -; CHECK-NEON-NEXT: moveq r5, r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r5 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: vmov.32 d1[1], r5 +; CHECK-NEON-NEXT: movne r7, r1 +; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; @@ -4432,44 +4151,33 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r7, r1 -; CHECK-FP16-NEXT: eor r1, r2, #1 -; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: mov r5, r0 +; CHECK-FP16-NEXT: subs r0, r2, #1 +; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: sbcs r0, r3, #0 +; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: mov r4, r1 +; CHECK-FP16-NEXT: movwlo r7, #1 +; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: orr r1, r1, r3 -; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r7, r6 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: vmov s0, r5 -; CHECK-FP16-NEXT: moveq r7, r1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: movne r6, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r6, r1 +; CHECK-FP16-NEXT: moveq r5, r7 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: eor r4, r2, #1 ; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: vmov.32 d1[0], r5 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: orr r4, r4, r3 -; CHECK-FP16-NEXT: movwlo r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r0, r5 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: moveq r0, r4 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: movwlo r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: moveq r0, r6 +; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: movne r7, r4 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r7 -; CHECK-FP16-NEXT: moveq r5, r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 +; CHECK-FP16-NEXT: movne r6, r1 +; CHECK-FP16-NEXT: vmov.32 d0[1], r6 ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc} entry: %conv = fptoui <2 x half> %x to <2 x i128> @@ -4481,130 +4189,100 @@ entry: define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} -; CHECK-NEON-NEXT: vmov r0, s0 -; CHECK-NEON-NEXT: vmov.f32 s16, s1 +; CHECK-NEON-NEXT: vmov r0, s1 +; CHECK-NEON-NEXT: vmov.f32 s16, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: mov r7, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r8, r0 -; CHECK-NEON-NEXT: eor r0, r2, #1 -; CHECK-NEON-NEXT: orr r10, r0, r3 +; CHECK-NEON-NEXT: mov r6, r0 ; CHECK-NEON-NEXT: subs r0, r2, #1 ; CHECK-NEON-NEXT: vmov s0, r7 ; CHECK-NEON-NEXT: sbcs r0, r3, #0 ; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: mov r5, r3 ; CHECK-NEON-NEXT: movwlt r7, #1 ; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r4, r7 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: moveq r4, r10 -; CHECK-NEON-NEXT: mov r5, r3 -; CHECK-NEON-NEXT: mov r9, #0 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movpl r5, r9 +; CHECK-NEON-NEXT: moveq r6, r7 +; CHECK-NEON-NEXT: moveq r5, r7 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movwmi r4, #0 +; CHECK-NEON-NEXT: mov r8, r1 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: movwmi r6, #0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: eor r6, r2, #1 ; CHECK-NEON-NEXT: subs r2, r2, #1 +; CHECK-NEON-NEXT: vmov.32 d0[0], r6 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: orr r6, r6, r3 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r0, r2 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r0, r6 +; CHECK-NEON-NEXT: movwlt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: moveq r0, r4 +; CHECK-NEON-NEXT: moveq r3, r4 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: movwmi r0, #0 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: movne r4, r1 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: vmov.32 d1[0], r0 +; CHECK-NEON-NEXT: movwmi r4, #0 ; CHECK-NEON-NEXT: cmp r7, #0 ; CHECK-NEON-NEXT: movne r7, r8 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: moveq r7, r10 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movwmi r7, #0 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movmi r9, r3 -; CHECK-NEON-NEXT: cmp r9, #0 -; CHECK-NEON-NEXT: movwmi r0, #0 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: vmov.32 d1[0], r7 -; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: moveq r2, r6 -; CHECK-NEON-NEXT: cmp r9, #0 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movwmi r2, #0 -; CHECK-NEON-NEXT: vmov.32 d0[1], r2 +; CHECK-NEON-NEXT: movwmi r7, #0 +; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} ; ; CHECK-FP16-LABEL: ustest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-FP16-NEXT: vmov.u16 r0, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r7, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r8, r0 -; CHECK-FP16-NEXT: eor r0, r2, #1 -; CHECK-FP16-NEXT: orr r10, r0, r3 +; CHECK-FP16-NEXT: mov r6, r0 ; CHECK-FP16-NEXT: subs r0, r2, #1 +; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: sbcs r0, r3, #0 ; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: mov r5, r3 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vmov s0, r6 -; CHECK-FP16-NEXT: moveq r4, r7 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: moveq r4, r10 -; CHECK-FP16-NEXT: mov r5, r3 -; CHECK-FP16-NEXT: mov r9, #0 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movpl r5, r9 +; CHECK-FP16-NEXT: moveq r6, r7 +; CHECK-FP16-NEXT: moveq r5, r7 ; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: movwmi r4, #0 +; CHECK-FP16-NEXT: mov r8, r1 +; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: movwmi r6, #0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: eor r6, r2, #1 ; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: vmov.32 d0[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: orr r6, r6, r3 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r0, r2 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r0, r6 +; CHECK-FP16-NEXT: movwlt r4, #1 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: moveq r0, r4 +; CHECK-FP16-NEXT: moveq r3, r4 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: movwmi r0, #0 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: movne r4, r1 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: vmov.32 d1[0], r0 +; CHECK-FP16-NEXT: movwmi r4, #0 ; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: movne r7, r8 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: moveq r7, r10 ; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: movwmi r7, #0 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movmi r9, r3 -; CHECK-FP16-NEXT: cmp r9, #0 -; CHECK-FP16-NEXT: movwmi r0, #0 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: movne r2, r1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: moveq r2, r6 -; CHECK-FP16-NEXT: cmp r9, #0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: movwmi r2, #0 -; CHECK-FP16-NEXT: vmov.32 d0[1], r2 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: movwmi r7, #0 +; CHECK-FP16-NEXT: vmov.32 d0[1], r7 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index d467ef6..ac395ba 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1931,46 +1931,36 @@ define i32 @stest_f64i32_mm(double %x) { ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixdfdi@plt ; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: addi a4, a2, -1 -; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bgez a1, .LBB27_9 +; RV32IF-NEXT: addi a3, a2, -1 +; RV32IF-NEXT: beqz a1, .LBB27_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: bgeu a0, a4, .LBB27_10 -; RV32IF-NEXT: .LBB27_2: # %entry -; RV32IF-NEXT: beqz a1, .LBB27_4 +; RV32IF-NEXT: slti a4, a1, 0 +; RV32IF-NEXT: j .LBB27_3 +; RV32IF-NEXT: .LBB27_2: +; RV32IF-NEXT: sltu a4, a0, a3 ; RV32IF-NEXT: .LBB27_3: # %entry +; RV32IF-NEXT: neg a5, a4 +; RV32IF-NEXT: and a1, a5, a1 +; RV32IF-NEXT: bnez a4, .LBB27_5 +; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: .LBB27_4: # %entry -; RV32IF-NEXT: srai a3, a1, 31 -; RV32IF-NEXT: and a1, a3, a1 -; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bltz a1, .LBB27_11 -; RV32IF-NEXT: # %bb.5: # %entry -; RV32IF-NEXT: bgeu a2, a0, .LBB27_12 -; RV32IF-NEXT: .LBB27_6: # %entry -; RV32IF-NEXT: li a2, -1 -; RV32IF-NEXT: beq a1, a2, .LBB27_8 -; RV32IF-NEXT: .LBB27_7: # %entry -; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: .LBB27_5: # %entry +; RV32IF-NEXT: li a3, -1 +; RV32IF-NEXT: beq a1, a3, .LBB27_7 +; RV32IF-NEXT: # %bb.6: # %entry +; RV32IF-NEXT: slti a1, a1, 0 +; RV32IF-NEXT: xori a1, a1, 1 +; RV32IF-NEXT: beqz a1, .LBB27_8 +; RV32IF-NEXT: j .LBB27_9 +; RV32IF-NEXT: .LBB27_7: +; RV32IF-NEXT: sltu a1, a2, a0 +; RV32IF-NEXT: bnez a1, .LBB27_9 ; RV32IF-NEXT: .LBB27_8: # %entry +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: .LBB27_9: # %entry ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB27_9: # %entry -; RV32IF-NEXT: mv a3, a4 -; RV32IF-NEXT: bltu a0, a4, .LBB27_2 -; RV32IF-NEXT: .LBB27_10: # %entry -; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: bnez a1, .LBB27_3 -; RV32IF-NEXT: j .LBB27_4 -; RV32IF-NEXT: .LBB27_11: # %entry -; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: bltu a2, a0, .LBB27_6 -; RV32IF-NEXT: .LBB27_12: # %entry -; RV32IF-NEXT: lui a0, 524288 -; RV32IF-NEXT: li a2, -1 -; RV32IF-NEXT: bne a1, a2, .LBB27_7 -; RV32IF-NEXT: j .LBB27_8 ; ; RV64IF-LABEL: stest_f64i32_mm: ; RV64IF: # %bb.0: # %entry @@ -2092,12 +2082,15 @@ define i32 @ustest_f64i32_mm(double %x) { ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixdfdi@plt -; RV32IF-NEXT: slti a2, a1, 0 -; RV32IF-NEXT: beqz a1, .LBB29_2 +; RV32IF-NEXT: bnez a1, .LBB29_2 ; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: li a2, 1 +; RV32IF-NEXT: j .LBB29_3 +; RV32IF-NEXT: .LBB29_2: +; RV32IF-NEXT: slti a2, a1, 1 +; RV32IF-NEXT: .LBB29_3: # %entry ; RV32IF-NEXT: addi a3, a2, -1 ; RV32IF-NEXT: or a0, a3, a0 -; RV32IF-NEXT: .LBB29_2: # %entry ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: slti a1, a1, 0 @@ -2260,46 +2253,36 @@ define i32 @stest_f16i32_mm(half %x) { ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt ; RV32-NEXT: lui a2, 524288 -; RV32-NEXT: addi a4, a2, -1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgez a1, .LBB33_9 +; RV32-NEXT: addi a3, a2, -1 +; RV32-NEXT: beqz a1, .LBB33_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bgeu a0, a4, .LBB33_10 -; RV32-NEXT: .LBB33_2: # %entry -; RV32-NEXT: beqz a1, .LBB33_4 +; RV32-NEXT: slti a4, a1, 0 +; RV32-NEXT: j .LBB33_3 +; RV32-NEXT: .LBB33_2: +; RV32-NEXT: sltu a4, a0, a3 ; RV32-NEXT: .LBB33_3: # %entry +; RV32-NEXT: neg a5, a4 +; RV32-NEXT: and a1, a5, a1 +; RV32-NEXT: bnez a4, .LBB33_5 +; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB33_4: # %entry -; RV32-NEXT: srai a3, a1, 31 -; RV32-NEXT: and a1, a3, a1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltz a1, .LBB33_11 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: bgeu a2, a0, .LBB33_12 -; RV32-NEXT: .LBB33_6: # %entry -; RV32-NEXT: li a2, -1 -; RV32-NEXT: beq a1, a2, .LBB33_8 -; RV32-NEXT: .LBB33_7: # %entry -; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB33_5: # %entry +; RV32-NEXT: li a3, -1 +; RV32-NEXT: beq a1, a3, .LBB33_7 +; RV32-NEXT: # %bb.6: # %entry +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xori a1, a1, 1 +; RV32-NEXT: beqz a1, .LBB33_8 +; RV32-NEXT: j .LBB33_9 +; RV32-NEXT: .LBB33_7: +; RV32-NEXT: sltu a1, a2, a0 +; RV32-NEXT: bnez a1, .LBB33_9 ; RV32-NEXT: .LBB33_8: # %entry +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: .LBB33_9: # %entry ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret -; RV32-NEXT: .LBB33_9: # %entry -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: bltu a0, a4, .LBB33_2 -; RV32-NEXT: .LBB33_10: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bnez a1, .LBB33_3 -; RV32-NEXT: j .LBB33_4 -; RV32-NEXT: .LBB33_11: # %entry -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: bltu a2, a0, .LBB33_6 -; RV32-NEXT: .LBB33_12: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: li a2, -1 -; RV32-NEXT: bne a1, a2, .LBB33_7 -; RV32-NEXT: j .LBB33_8 ; ; RV64-LABEL: stest_f16i32_mm: ; RV64: # %bb.0: # %entry @@ -2383,12 +2366,15 @@ define i32 @ustest_f16i32_mm(half %x) { ; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: slti a2, a1, 0 -; RV32-NEXT: beqz a1, .LBB35_2 +; RV32-NEXT: bnez a1, .LBB35_2 ; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: li a2, 1 +; RV32-NEXT: j .LBB35_3 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: slti a2, a1, 1 +; RV32-NEXT: .LBB35_3: # %entry ; RV32-NEXT: addi a3, a2, -1 ; RV32-NEXT: or a0, a3, a0 -; RV32-NEXT: .LBB35_2: # %entry ; RV32-NEXT: neg a2, a2 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: slti a1, a1, 0 @@ -2937,86 +2923,56 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt ; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw t0, 8(sp) -; RV32IF-NEXT: lw a4, 12(sp) -; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: addi a6, a3, -1 -; RV32IF-NEXT: mv a2, t0 -; RV32IF-NEXT: beq a4, a6, .LBB45_2 +; RV32IF-NEXT: addi a5, a3, -1 +; RV32IF-NEXT: beq a1, a5, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: sltu a2, a4, a6 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: or a2, a2, t0 -; RV32IF-NEXT: .LBB45_2: # %entry -; RV32IF-NEXT: or a7, a1, a0 -; RV32IF-NEXT: slti a5, a0, 0 -; RV32IF-NEXT: bnez a7, .LBB45_16 -; RV32IF-NEXT: # %bb.3: # %entry -; RV32IF-NEXT: mv t0, a4 -; RV32IF-NEXT: bgez a0, .LBB45_17 +; RV32IF-NEXT: sltu a6, a1, a5 +; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: bnez a7, .LBB45_3 +; RV32IF-NEXT: j .LBB45_4 +; RV32IF-NEXT: .LBB45_2: +; RV32IF-NEXT: sltiu a6, a4, -1 +; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: beqz a7, .LBB45_4 +; RV32IF-NEXT: .LBB45_3: # %entry +; RV32IF-NEXT: slti a6, a0, 0 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: bgeu a4, a6, .LBB45_18 -; RV32IF-NEXT: .LBB45_5: # %entry -; RV32IF-NEXT: beqz a7, .LBB45_7 +; RV32IF-NEXT: neg a7, a6 +; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: bnez a6, .LBB45_6 +; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: mv a4, t0 -; RV32IF-NEXT: .LBB45_7: # %entry -; RV32IF-NEXT: srai a6, a0, 31 -; RV32IF-NEXT: and a1, a6, a1 -; RV32IF-NEXT: seqz a6, a0 -; RV32IF-NEXT: neg a5, a5 -; RV32IF-NEXT: and a5, a5, a0 -; RV32IF-NEXT: addi a6, a6, -1 -; RV32IF-NEXT: mv a0, a4 -; RV32IF-NEXT: bgez a5, .LBB45_9 -; RV32IF-NEXT: # %bb.8: # %entry -; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: or a4, t0, a4 +; RV32IF-NEXT: and a5, a7, a0 +; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: beq a1, a3, .LBB45_8 +; RV32IF-NEXT: # %bb.7: # %entry +; RV32IF-NEXT: sltu a0, a3, a1 +; RV32IF-NEXT: j .LBB45_9 +; RV32IF-NEXT: .LBB45_8: +; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a6, a6, a1 -; RV32IF-NEXT: mv a1, a4 -; RV32IF-NEXT: bltu a3, a4, .LBB45_11 +; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: li a3, -1 +; RV32IF-NEXT: beq a2, a3, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB45_11: # %entry -; RV32IF-NEXT: and a6, a6, a5 -; RV32IF-NEXT: li a7, -1 -; RV32IF-NEXT: bne a6, a7, .LBB45_19 +; RV32IF-NEXT: bnez a0, .LBB45_13 ; RV32IF-NEXT: # %bb.12: # %entry -; RV32IF-NEXT: mv a0, a2 -; RV32IF-NEXT: bne a4, a3, .LBB45_20 +; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB45_13: # %entry -; RV32IF-NEXT: beq a6, a7, .LBB45_15 -; RV32IF-NEXT: .LBB45_14: # %entry -; RV32IF-NEXT: slti a0, a5, 0 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: and a0, a0, a2 -; RV32IF-NEXT: .LBB45_15: # %entry +; RV32IF-NEXT: neg a0, a0 +; RV32IF-NEXT: and a0, a0, a4 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB45_16: # %entry -; RV32IF-NEXT: addi a2, a5, -1 -; RV32IF-NEXT: or a2, a2, t0 -; RV32IF-NEXT: mv t0, a4 -; RV32IF-NEXT: bltz a0, .LBB45_4 -; RV32IF-NEXT: .LBB45_17: # %entry -; RV32IF-NEXT: mv t0, a6 -; RV32IF-NEXT: bltu a4, a6, .LBB45_5 -; RV32IF-NEXT: .LBB45_18: # %entry -; RV32IF-NEXT: mv a4, a6 -; RV32IF-NEXT: bnez a7, .LBB45_6 -; RV32IF-NEXT: j .LBB45_7 -; RV32IF-NEXT: .LBB45_19: # %entry -; RV32IF-NEXT: mv a1, a0 -; RV32IF-NEXT: mv a0, a2 -; RV32IF-NEXT: beq a4, a3, .LBB45_13 -; RV32IF-NEXT: .LBB45_20: # %entry -; RV32IF-NEXT: sltu a0, a3, a4 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: and a0, a0, a2 -; RV32IF-NEXT: bne a6, a7, .LBB45_14 -; RV32IF-NEXT: j .LBB45_15 ; ; RV64IF-LABEL: stest_f64i64_mm: ; RV64IF: # %bb.0: # %entry @@ -3026,45 +2982,36 @@ define i64 @stest_f64i64_mm(double %x) { ; RV64IF-NEXT: .cfi_offset ra, -8 ; RV64IF-NEXT: call __fixdfti@plt ; RV64IF-NEXT: li a2, -1 -; RV64IF-NEXT: srli a4, a2, 1 -; RV64IF-NEXT: mv a3, a0 -; RV64IF-NEXT: bgez a1, .LBB45_9 +; RV64IF-NEXT: srli a3, a2, 1 +; RV64IF-NEXT: beqz a1, .LBB45_2 ; RV64IF-NEXT: # %bb.1: # %entry -; RV64IF-NEXT: bgeu a0, a4, .LBB45_10 -; RV64IF-NEXT: .LBB45_2: # %entry -; RV64IF-NEXT: beqz a1, .LBB45_4 +; RV64IF-NEXT: slti a4, a1, 0 +; RV64IF-NEXT: j .LBB45_3 +; RV64IF-NEXT: .LBB45_2: +; RV64IF-NEXT: sltu a4, a0, a3 ; RV64IF-NEXT: .LBB45_3: # %entry +; RV64IF-NEXT: neg a5, a4 +; RV64IF-NEXT: and a5, a5, a1 +; RV64IF-NEXT: bnez a4, .LBB45_5 +; RV64IF-NEXT: # %bb.4: # %entry ; RV64IF-NEXT: mv a0, a3 -; RV64IF-NEXT: .LBB45_4: # %entry -; RV64IF-NEXT: srai a3, a1, 63 -; RV64IF-NEXT: and a1, a3, a1 -; RV64IF-NEXT: slli a4, a2, 63 -; RV64IF-NEXT: mv a3, a0 -; RV64IF-NEXT: bltz a1, .LBB45_11 -; RV64IF-NEXT: # %bb.5: # %entry -; RV64IF-NEXT: bgeu a4, a0, .LBB45_12 -; RV64IF-NEXT: .LBB45_6: # %entry -; RV64IF-NEXT: beq a1, a2, .LBB45_8 -; RV64IF-NEXT: .LBB45_7: # %entry -; RV64IF-NEXT: mv a0, a3 +; RV64IF-NEXT: .LBB45_5: # %entry +; RV64IF-NEXT: slli a1, a2, 63 +; RV64IF-NEXT: beq a5, a2, .LBB45_7 +; RV64IF-NEXT: # %bb.6: # %entry +; RV64IF-NEXT: slti a2, a5, 0 +; RV64IF-NEXT: xori a2, a2, 1 +; RV64IF-NEXT: beqz a2, .LBB45_8 +; RV64IF-NEXT: j .LBB45_9 +; RV64IF-NEXT: .LBB45_7: +; RV64IF-NEXT: sltu a2, a1, a0 +; RV64IF-NEXT: bnez a2, .LBB45_9 ; RV64IF-NEXT: .LBB45_8: # %entry +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: .LBB45_9: # %entry ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret -; RV64IF-NEXT: .LBB45_9: # %entry -; RV64IF-NEXT: mv a3, a4 -; RV64IF-NEXT: bltu a0, a4, .LBB45_2 -; RV64IF-NEXT: .LBB45_10: # %entry -; RV64IF-NEXT: mv a0, a4 -; RV64IF-NEXT: bnez a1, .LBB45_3 -; RV64IF-NEXT: j .LBB45_4 -; RV64IF-NEXT: .LBB45_11: # %entry -; RV64IF-NEXT: mv a3, a4 -; RV64IF-NEXT: bltu a4, a0, .LBB45_6 -; RV64IF-NEXT: .LBB45_12: # %entry -; RV64IF-NEXT: mv a0, a4 -; RV64IF-NEXT: bne a1, a2, .LBB45_7 -; RV64IF-NEXT: j .LBB45_8 ; ; RV32IFD-LABEL: stest_f64i64_mm: ; RV32IFD: # %bb.0: # %entry @@ -3075,86 +3022,56 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt ; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw t0, 8(sp) -; RV32IFD-NEXT: lw a4, 12(sp) -; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: addi a6, a3, -1 -; RV32IFD-NEXT: mv a2, t0 -; RV32IFD-NEXT: beq a4, a6, .LBB45_2 +; RV32IFD-NEXT: addi a5, a3, -1 +; RV32IFD-NEXT: beq a1, a5, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: sltu a2, a4, a6 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: or a2, a2, t0 -; RV32IFD-NEXT: .LBB45_2: # %entry -; RV32IFD-NEXT: or a7, a1, a0 -; RV32IFD-NEXT: slti a5, a0, 0 -; RV32IFD-NEXT: bnez a7, .LBB45_16 -; RV32IFD-NEXT: # %bb.3: # %entry -; RV32IFD-NEXT: mv t0, a4 -; RV32IFD-NEXT: bgez a0, .LBB45_17 +; RV32IFD-NEXT: sltu a6, a1, a5 +; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: bnez a7, .LBB45_3 +; RV32IFD-NEXT: j .LBB45_4 +; RV32IFD-NEXT: .LBB45_2: +; RV32IFD-NEXT: sltiu a6, a4, -1 +; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: beqz a7, .LBB45_4 +; RV32IFD-NEXT: .LBB45_3: # %entry +; RV32IFD-NEXT: slti a6, a0, 0 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: bgeu a4, a6, .LBB45_18 -; RV32IFD-NEXT: .LBB45_5: # %entry -; RV32IFD-NEXT: beqz a7, .LBB45_7 +; RV32IFD-NEXT: neg a7, a6 +; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: bnez a6, .LBB45_6 +; RV32IFD-NEXT: # %bb.5: # %entry +; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: mv a4, t0 -; RV32IFD-NEXT: .LBB45_7: # %entry -; RV32IFD-NEXT: srai a6, a0, 31 -; RV32IFD-NEXT: and a1, a6, a1 -; RV32IFD-NEXT: seqz a6, a0 -; RV32IFD-NEXT: neg a5, a5 -; RV32IFD-NEXT: and a5, a5, a0 -; RV32IFD-NEXT: addi a6, a6, -1 -; RV32IFD-NEXT: mv a0, a4 -; RV32IFD-NEXT: bgez a5, .LBB45_9 -; RV32IFD-NEXT: # %bb.8: # %entry -; RV32IFD-NEXT: lui a0, 524288 +; RV32IFD-NEXT: or a4, t0, a4 +; RV32IFD-NEXT: and a5, a7, a0 +; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: beq a1, a3, .LBB45_8 +; RV32IFD-NEXT: # %bb.7: # %entry +; RV32IFD-NEXT: sltu a0, a3, a1 +; RV32IFD-NEXT: j .LBB45_9 +; RV32IFD-NEXT: .LBB45_8: +; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a6, a6, a1 -; RV32IFD-NEXT: mv a1, a4 -; RV32IFD-NEXT: bltu a3, a4, .LBB45_11 +; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: li a3, -1 +; RV32IFD-NEXT: beq a2, a3, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB45_11: # %entry -; RV32IFD-NEXT: and a6, a6, a5 -; RV32IFD-NEXT: li a7, -1 -; RV32IFD-NEXT: bne a6, a7, .LBB45_19 +; RV32IFD-NEXT: bnez a0, .LBB45_13 ; RV32IFD-NEXT: # %bb.12: # %entry -; RV32IFD-NEXT: mv a0, a2 -; RV32IFD-NEXT: bne a4, a3, .LBB45_20 +; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB45_13: # %entry -; RV32IFD-NEXT: beq a6, a7, .LBB45_15 -; RV32IFD-NEXT: .LBB45_14: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 -; RV32IFD-NEXT: addi a0, a0, -1 -; RV32IFD-NEXT: and a0, a0, a2 -; RV32IFD-NEXT: .LBB45_15: # %entry +; RV32IFD-NEXT: neg a0, a0 +; RV32IFD-NEXT: and a0, a0, a4 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret -; RV32IFD-NEXT: .LBB45_16: # %entry -; RV32IFD-NEXT: addi a2, a5, -1 -; RV32IFD-NEXT: or a2, a2, t0 -; RV32IFD-NEXT: mv t0, a4 -; RV32IFD-NEXT: bltz a0, .LBB45_4 -; RV32IFD-NEXT: .LBB45_17: # %entry -; RV32IFD-NEXT: mv t0, a6 -; RV32IFD-NEXT: bltu a4, a6, .LBB45_5 -; RV32IFD-NEXT: .LBB45_18: # %entry -; RV32IFD-NEXT: mv a4, a6 -; RV32IFD-NEXT: bnez a7, .LBB45_6 -; RV32IFD-NEXT: j .LBB45_7 -; RV32IFD-NEXT: .LBB45_19: # %entry -; RV32IFD-NEXT: mv a1, a0 -; RV32IFD-NEXT: mv a0, a2 -; RV32IFD-NEXT: beq a4, a3, .LBB45_13 -; RV32IFD-NEXT: .LBB45_20: # %entry -; RV32IFD-NEXT: sltu a0, a3, a4 -; RV32IFD-NEXT: neg a0, a0 -; RV32IFD-NEXT: and a0, a0, a2 -; RV32IFD-NEXT: bne a6, a7, .LBB45_14 -; RV32IFD-NEXT: j .LBB45_15 ; ; RV64IFD-LABEL: stest_f64i64_mm: ; RV64IFD: # %bb.0: # %entry @@ -3188,15 +3105,14 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) ; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: snez a4, a4 -; RV32IF-NEXT: addi a4, a4, -1 -; RV32IF-NEXT: and a3, a4, a3 +; RV32IF-NEXT: seqz a4, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi a1, a0, -1 +; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: and a0, a0, a4 +; RV32IF-NEXT: neg a1, a0 ; RV32IF-NEXT: and a0, a1, a3 -; RV32IF-NEXT: and a2, a4, a2 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 @@ -3209,11 +3125,7 @@ define i64 @utest_f64i64_mm(double %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunsdfti@plt -; RV64-NEXT: snez a2, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: snez a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3233,15 +3145,14 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) ; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: snez a4, a4 -; RV32IFD-NEXT: addi a4, a4, -1 -; RV32IFD-NEXT: and a3, a4, a3 +; RV32IFD-NEXT: seqz a4, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi a1, a0, -1 +; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: and a0, a0, a4 +; RV32IFD-NEXT: neg a1, a0 ; RV32IFD-NEXT: and a0, a1, a3 -; RV32IFD-NEXT: and a2, a4, a2 ; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 @@ -3265,32 +3176,29 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt ; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: lw a3, 12(sp) -; RV32IF-NEXT: lw a1, 20(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a4, 16(sp) -; RV32IF-NEXT: slti a2, a1, 0 -; RV32IF-NEXT: beqz a1, .LBB47_2 +; RV32IF-NEXT: beqz a2, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: mv a5, a2 +; RV32IF-NEXT: slti a3, a2, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: -; RV32IF-NEXT: seqz a5, a4 +; RV32IF-NEXT: seqz a3, a4 ; RV32IF-NEXT: .LBB47_3: # %entry -; RV32IF-NEXT: neg a5, a5 -; RV32IF-NEXT: and a3, a5, a3 ; RV32IF-NEXT: xori a4, a4, 1 -; RV32IF-NEXT: or a4, a4, a1 +; RV32IF-NEXT: or a4, a4, a2 ; RV32IF-NEXT: seqz a4, a4 ; RV32IF-NEXT: addi a4, a4, -1 ; RV32IF-NEXT: and a3, a4, a3 -; RV32IF-NEXT: and a0, a5, a0 -; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: and a0, a3, a0 +; RV32IF-NEXT: and a2, a3, a2 +; RV32IF-NEXT: slti a2, a2, 0 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: slti a1, a1, 0 -; RV32IF-NEXT: addi a1, a1, -1 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: and a1, a1, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3307,12 +3215,8 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB47_2: # %entry -; RV64-NEXT: slti a3, a1, 1 -; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slti a1, a1, 1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: slti a1, a2, 0 ; RV64-NEXT: addi a1, a1, -1 @@ -3330,32 +3234,29 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt ; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a3, 12(sp) -; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a4, 16(sp) -; RV32IFD-NEXT: slti a2, a1, 0 -; RV32IFD-NEXT: beqz a1, .LBB47_2 +; RV32IFD-NEXT: beqz a2, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: mv a5, a2 +; RV32IFD-NEXT: slti a3, a2, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: -; RV32IFD-NEXT: seqz a5, a4 +; RV32IFD-NEXT: seqz a3, a4 ; RV32IFD-NEXT: .LBB47_3: # %entry -; RV32IFD-NEXT: neg a5, a5 -; RV32IFD-NEXT: and a3, a5, a3 ; RV32IFD-NEXT: xori a4, a4, 1 -; RV32IFD-NEXT: or a4, a4, a1 +; RV32IFD-NEXT: or a4, a4, a2 ; RV32IFD-NEXT: seqz a4, a4 ; RV32IFD-NEXT: addi a4, a4, -1 ; RV32IFD-NEXT: and a3, a4, a3 -; RV32IFD-NEXT: and a0, a5, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: neg a3, a3 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: and a2, a3, a2 +; RV32IFD-NEXT: slti a2, a2, 0 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: and a0, a2, a0 ; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: slti a1, a1, 0 -; RV32IFD-NEXT: addi a1, a1, -1 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: and a1, a1, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3377,86 +3278,56 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw t0, 8(sp) -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a6, a3, -1 -; RV32-NEXT: mv a2, t0 -; RV32-NEXT: beq a4, a6, .LBB48_2 +; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: beq a1, a5, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a2, a4, a6 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: or a2, a2, t0 -; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: or a7, a1, a0 -; RV32-NEXT: slti a5, a0, 0 -; RV32-NEXT: bnez a7, .LBB48_16 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bgez a0, .LBB48_17 +; RV32-NEXT: sltu a6, a1, a5 +; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: bnez a7, .LBB48_3 +; RV32-NEXT: j .LBB48_4 +; RV32-NEXT: .LBB48_2: +; RV32-NEXT: sltiu a6, a4, -1 +; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: beqz a7, .LBB48_4 +; RV32-NEXT: .LBB48_3: # %entry +; RV32-NEXT: slti a6, a0, 0 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: bgeu a4, a6, .LBB48_18 -; RV32-NEXT: .LBB48_5: # %entry -; RV32-NEXT: beqz a7, .LBB48_7 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: bnez a6, .LBB48_6 +; RV32-NEXT: # %bb.5: # %entry +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: mv a4, t0 -; RV32-NEXT: .LBB48_7: # %entry -; RV32-NEXT: srai a6, a0, 31 -; RV32-NEXT: and a1, a6, a1 -; RV32-NEXT: seqz a6, a0 -; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a5, a5, a0 -; RV32-NEXT: addi a6, a6, -1 -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bgez a5, .LBB48_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: or a4, t0, a4 +; RV32-NEXT: and a5, a7, a0 +; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: beq a1, a3, .LBB48_8 +; RV32-NEXT: # %bb.7: # %entry +; RV32-NEXT: sltu a0, a3, a1 +; RV32-NEXT: j .LBB48_9 +; RV32-NEXT: .LBB48_8: +; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a6, a6, a1 -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: bltu a3, a4, .LBB48_11 +; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: li a3, -1 +; RV32-NEXT: beq a2, a3, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB48_11: # %entry -; RV32-NEXT: and a6, a6, a5 -; RV32-NEXT: li a7, -1 -; RV32-NEXT: bne a6, a7, .LBB48_19 +; RV32-NEXT: bnez a0, .LBB48_13 ; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bne a4, a3, .LBB48_20 +; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB48_13: # %entry -; RV32-NEXT: beq a6, a7, .LBB48_15 -; RV32-NEXT: .LBB48_14: # %entry -; RV32-NEXT: slti a0, a5, 0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: .LBB48_15: # %entry +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret -; RV32-NEXT: .LBB48_16: # %entry -; RV32-NEXT: addi a2, a5, -1 -; RV32-NEXT: or a2, a2, t0 -; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltz a0, .LBB48_4 -; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: mv t0, a6 -; RV32-NEXT: bltu a4, a6, .LBB48_5 -; RV32-NEXT: .LBB48_18: # %entry -; RV32-NEXT: mv a4, a6 -; RV32-NEXT: bnez a7, .LBB48_6 -; RV32-NEXT: j .LBB48_7 -; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beq a4, a3, .LBB48_13 -; RV32-NEXT: .LBB48_20: # %entry -; RV32-NEXT: sltu a0, a3, a4 -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: bne a6, a7, .LBB48_14 -; RV32-NEXT: j .LBB48_15 ; ; RV64-LABEL: stest_f32i64_mm: ; RV64: # %bb.0: # %entry @@ -3488,15 +3359,14 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) ; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: seqz a4, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: neg a1, a0 ; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a2, a4, a2 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -3509,11 +3379,7 @@ define i64 @utest_f32i64_mm(float %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: snez a2, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: snez a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3536,32 +3402,29 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a3, 12(sp) -; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a4, 16(sp) -; RV32-NEXT: slti a2, a1, 0 -; RV32-NEXT: beqz a1, .LBB50_2 +; RV32-NEXT: beqz a2, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a5, a2 +; RV32-NEXT: slti a3, a2, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: -; RV32-NEXT: seqz a5, a4 +; RV32-NEXT: seqz a3, a4 ; RV32-NEXT: .LBB50_3: # %entry -; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a3, a5, a3 ; RV32-NEXT: xori a4, a4, 1 -; RV32-NEXT: or a4, a4, a1 +; RV32-NEXT: or a4, a4, a2 ; RV32-NEXT: seqz a4, a4 ; RV32-NEXT: addi a4, a4, -1 ; RV32-NEXT: and a3, a4, a3 -; RV32-NEXT: and a0, a5, a0 -; RV32-NEXT: and a0, a4, a0 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a0, a3, a0 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: slti a2, a2, 0 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: slti a1, a1, 0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3578,12 +3441,8 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB50_2: # %entry -; RV64-NEXT: slti a3, a1, 1 -; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slti a1, a1, 1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: slti a1, a2, 0 ; RV64-NEXT: addi a1, a1, -1 @@ -3611,86 +3470,56 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw t0, 8(sp) -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a6, a3, -1 -; RV32-NEXT: mv a2, t0 -; RV32-NEXT: beq a4, a6, .LBB51_2 +; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: beq a1, a5, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a2, a4, a6 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: or a2, a2, t0 -; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: or a7, a1, a0 -; RV32-NEXT: slti a5, a0, 0 -; RV32-NEXT: bnez a7, .LBB51_16 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bgez a0, .LBB51_17 +; RV32-NEXT: sltu a6, a1, a5 +; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: bnez a7, .LBB51_3 +; RV32-NEXT: j .LBB51_4 +; RV32-NEXT: .LBB51_2: +; RV32-NEXT: sltiu a6, a4, -1 +; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: beqz a7, .LBB51_4 +; RV32-NEXT: .LBB51_3: # %entry +; RV32-NEXT: slti a6, a0, 0 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: bgeu a4, a6, .LBB51_18 -; RV32-NEXT: .LBB51_5: # %entry -; RV32-NEXT: beqz a7, .LBB51_7 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: bnez a6, .LBB51_6 +; RV32-NEXT: # %bb.5: # %entry +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: mv a4, t0 -; RV32-NEXT: .LBB51_7: # %entry -; RV32-NEXT: srai a6, a0, 31 -; RV32-NEXT: and a1, a6, a1 -; RV32-NEXT: seqz a6, a0 -; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a5, a5, a0 -; RV32-NEXT: addi a6, a6, -1 -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bgez a5, .LBB51_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: or a4, t0, a4 +; RV32-NEXT: and a5, a7, a0 +; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: beq a1, a3, .LBB51_8 +; RV32-NEXT: # %bb.7: # %entry +; RV32-NEXT: sltu a0, a3, a1 +; RV32-NEXT: j .LBB51_9 +; RV32-NEXT: .LBB51_8: +; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a6, a6, a1 -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: bltu a3, a4, .LBB51_11 +; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: li a3, -1 +; RV32-NEXT: beq a2, a3, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB51_11: # %entry -; RV32-NEXT: and a6, a6, a5 -; RV32-NEXT: li a7, -1 -; RV32-NEXT: bne a6, a7, .LBB51_19 +; RV32-NEXT: bnez a0, .LBB51_13 ; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bne a4, a3, .LBB51_20 +; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB51_13: # %entry -; RV32-NEXT: beq a6, a7, .LBB51_15 -; RV32-NEXT: .LBB51_14: # %entry -; RV32-NEXT: slti a0, a5, 0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: .LBB51_15: # %entry +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret -; RV32-NEXT: .LBB51_16: # %entry -; RV32-NEXT: addi a2, a5, -1 -; RV32-NEXT: or a2, a2, t0 -; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltz a0, .LBB51_4 -; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: mv t0, a6 -; RV32-NEXT: bltu a4, a6, .LBB51_5 -; RV32-NEXT: .LBB51_18: # %entry -; RV32-NEXT: mv a4, a6 -; RV32-NEXT: bnez a7, .LBB51_6 -; RV32-NEXT: j .LBB51_7 -; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beq a4, a3, .LBB51_13 -; RV32-NEXT: .LBB51_20: # %entry -; RV32-NEXT: sltu a0, a3, a4 -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: bne a6, a7, .LBB51_14 -; RV32-NEXT: j .LBB51_15 ; ; RV64-LABEL: stest_f16i64_mm: ; RV64: # %bb.0: # %entry @@ -3702,45 +3531,36 @@ define i64 @stest_f16i64_mm(half %x) { ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt ; RV64-NEXT: li a2, -1 -; RV64-NEXT: srli a4, a2, 1 -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bgez a1, .LBB51_9 +; RV64-NEXT: srli a3, a2, 1 +; RV64-NEXT: beqz a1, .LBB51_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: bgeu a0, a4, .LBB51_10 -; RV64-NEXT: .LBB51_2: # %entry -; RV64-NEXT: beqz a1, .LBB51_4 +; RV64-NEXT: slti a4, a1, 0 +; RV64-NEXT: j .LBB51_3 +; RV64-NEXT: .LBB51_2: +; RV64-NEXT: sltu a4, a0, a3 ; RV64-NEXT: .LBB51_3: # %entry +; RV64-NEXT: neg a5, a4 +; RV64-NEXT: and a5, a5, a1 +; RV64-NEXT: bnez a4, .LBB51_5 +; RV64-NEXT: # %bb.4: # %entry ; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB51_4: # %entry -; RV64-NEXT: srai a3, a1, 63 -; RV64-NEXT: and a1, a3, a1 -; RV64-NEXT: slli a4, a2, 63 -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bltz a1, .LBB51_11 -; RV64-NEXT: # %bb.5: # %entry -; RV64-NEXT: bgeu a4, a0, .LBB51_12 -; RV64-NEXT: .LBB51_6: # %entry -; RV64-NEXT: beq a1, a2, .LBB51_8 -; RV64-NEXT: .LBB51_7: # %entry -; RV64-NEXT: mv a0, a3 +; RV64-NEXT: .LBB51_5: # %entry +; RV64-NEXT: slli a1, a2, 63 +; RV64-NEXT: beq a5, a2, .LBB51_7 +; RV64-NEXT: # %bb.6: # %entry +; RV64-NEXT: slti a2, a5, 0 +; RV64-NEXT: xori a2, a2, 1 +; RV64-NEXT: beqz a2, .LBB51_8 +; RV64-NEXT: j .LBB51_9 +; RV64-NEXT: .LBB51_7: +; RV64-NEXT: sltu a2, a1, a0 +; RV64-NEXT: bnez a2, .LBB51_9 ; RV64-NEXT: .LBB51_8: # %entry +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB51_9: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret -; RV64-NEXT: .LBB51_9: # %entry -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a0, a4, .LBB51_2 -; RV64-NEXT: .LBB51_10: # %entry -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: bnez a1, .LBB51_3 -; RV64-NEXT: j .LBB51_4 -; RV64-NEXT: .LBB51_11: # %entry -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a4, a0, .LBB51_6 -; RV64-NEXT: .LBB51_12: # %entry -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: bne a1, a2, .LBB51_7 -; RV64-NEXT: j .LBB51_8 entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3765,15 +3585,14 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) ; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: seqz a4, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: neg a1, a0 ; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a2, a4, a2 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -3788,11 +3607,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: snez a2, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: snez a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3817,32 +3632,29 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a3, 12(sp) -; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a4, 16(sp) -; RV32-NEXT: slti a2, a1, 0 -; RV32-NEXT: beqz a1, .LBB53_2 +; RV32-NEXT: beqz a2, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a5, a2 +; RV32-NEXT: slti a3, a2, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: -; RV32-NEXT: seqz a5, a4 +; RV32-NEXT: seqz a3, a4 ; RV32-NEXT: .LBB53_3: # %entry -; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a3, a5, a3 ; RV32-NEXT: xori a4, a4, 1 -; RV32-NEXT: or a4, a4, a1 +; RV32-NEXT: or a4, a4, a2 ; RV32-NEXT: seqz a4, a4 ; RV32-NEXT: addi a4, a4, -1 ; RV32-NEXT: and a3, a4, a3 -; RV32-NEXT: and a0, a5, a0 -; RV32-NEXT: and a0, a4, a0 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a0, a3, a0 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: slti a2, a2, 0 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: slti a1, a1, 0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3861,12 +3673,8 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB53_2: # %entry -; RV64-NEXT: slti a3, a1, 1 -; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slti a1, a1, 1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: slti a1, a2, 0 ; RV64-NEXT: addi a1, a1, -1 diff --git a/llvm/test/CodeGen/RISCV/min-max.ll b/llvm/test/CodeGen/RISCV/min-max.ll index 2a88807..0115b48 100644 --- a/llvm/test/CodeGen/RISCV/min-max.ll +++ b/llvm/test/CodeGen/RISCV/min-max.ll @@ -70,27 +70,18 @@ declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone define i64 @smax_i64(i64 %a, i64 %b) { ; RV32I-LABEL: smax_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: bge a3, a1, .LBB3_5 +; RV32I-NEXT: beq a1, a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: bgeu a2, a0, .LBB3_6 +; RV32I-NEXT: slt a4, a3, a1 +; RV32I-NEXT: beqz a4, .LBB3_3 +; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: bne a1, a3, .LBB3_7 +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: bnez a4, .LBB3_4 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: bge a3, a1, .LBB3_8 -; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB3_5: -; RV32I-NEXT: mv a4, a2 -; RV32I-NEXT: bltu a2, a0, .LBB3_2 -; RV32I-NEXT: .LBB3_6: ; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beq a1, a3, .LBB3_3 -; RV32I-NEXT: .LBB3_7: -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: blt a3, a1, .LBB3_4 -; RV32I-NEXT: .LBB3_8: ; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB3_4: ; RV32I-NEXT: ret ; ; RV64I-LABEL: smax_i64: @@ -103,19 +94,18 @@ define i64 @smax_i64(i64 %a, i64 %b) { ; ; RV32ZBB-LABEL: smax_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bge a3, a1, .LBB3_3 +; RV32ZBB-NEXT: beq a1, a3, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB3_4 +; RV32ZBB-NEXT: slt a4, a3, a1 +; RV32ZBB-NEXT: beqz a4, .LBB3_3 +; RV32ZBB-NEXT: j .LBB3_4 ; RV32ZBB-NEXT: .LBB3_2: -; RV32ZBB-NEXT: max a1, a1, a3 -; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: sltu a4, a2, a0 +; RV32ZBB-NEXT: bnez a4, .LBB3_4 ; RV32ZBB-NEXT: .LBB3_3: ; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB3_2 +; RV32ZBB-NEXT: mv a1, a3 ; RV32ZBB-NEXT: .LBB3_4: -; RV32ZBB-NEXT: maxu a0, a4, a2 -; RV32ZBB-NEXT: max a1, a1, a3 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: smax_i64: @@ -188,27 +178,18 @@ declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone define i64 @smin_i64(i64 %a, i64 %b) { ; RV32I-LABEL: smin_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: bge a1, a3, .LBB7_5 +; RV32I-NEXT: beq a1, a3, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: bgeu a0, a2, .LBB7_6 +; RV32I-NEXT: slt a4, a1, a3 +; RV32I-NEXT: beqz a4, .LBB7_3 +; RV32I-NEXT: j .LBB7_4 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: bne a1, a3, .LBB7_7 +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: bnez a4, .LBB7_4 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: bge a1, a3, .LBB7_8 -; RV32I-NEXT: .LBB7_4: -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: mv a4, a2 -; RV32I-NEXT: bltu a0, a2, .LBB7_2 -; RV32I-NEXT: .LBB7_6: ; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beq a1, a3, .LBB7_3 -; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: blt a1, a3, .LBB7_4 -; RV32I-NEXT: .LBB7_8: ; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB7_4: ; RV32I-NEXT: ret ; ; RV64I-LABEL: smin_i64: @@ -221,19 +202,18 @@ define i64 @smin_i64(i64 %a, i64 %b) { ; ; RV32ZBB-LABEL: smin_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bge a1, a3, .LBB7_3 +; RV32ZBB-NEXT: beq a1, a3, .LBB7_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB7_4 +; RV32ZBB-NEXT: slt a4, a1, a3 +; RV32ZBB-NEXT: beqz a4, .LBB7_3 +; RV32ZBB-NEXT: j .LBB7_4 ; RV32ZBB-NEXT: .LBB7_2: -; RV32ZBB-NEXT: min a1, a1, a3 -; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: sltu a4, a0, a2 +; RV32ZBB-NEXT: bnez a4, .LBB7_4 ; RV32ZBB-NEXT: .LBB7_3: ; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB7_2 +; RV32ZBB-NEXT: mv a1, a3 ; RV32ZBB-NEXT: .LBB7_4: -; RV32ZBB-NEXT: minu a0, a4, a2 -; RV32ZBB-NEXT: min a1, a1, a3 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: smin_i64: @@ -306,27 +286,18 @@ declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone define i64 @umax_i64(i64 %a, i64 %b) { ; RV32I-LABEL: umax_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: bgeu a3, a1, .LBB11_5 +; RV32I-NEXT: beq a1, a3, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: bgeu a2, a0, .LBB11_6 +; RV32I-NEXT: sltu a4, a3, a1 +; RV32I-NEXT: beqz a4, .LBB11_3 +; RV32I-NEXT: j .LBB11_4 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: bne a1, a3, .LBB11_7 +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: bnez a4, .LBB11_4 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: bgeu a3, a1, .LBB11_8 -; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: mv a4, a2 -; RV32I-NEXT: bltu a2, a0, .LBB11_2 -; RV32I-NEXT: .LBB11_6: ; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beq a1, a3, .LBB11_3 -; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: bltu a3, a1, .LBB11_4 -; RV32I-NEXT: .LBB11_8: ; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB11_4: ; RV32I-NEXT: ret ; ; RV64I-LABEL: umax_i64: @@ -339,19 +310,18 @@ define i64 @umax_i64(i64 %a, i64 %b) { ; ; RV32ZBB-LABEL: umax_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bgeu a3, a1, .LBB11_3 +; RV32ZBB-NEXT: beq a1, a3, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB11_4 +; RV32ZBB-NEXT: sltu a4, a3, a1 +; RV32ZBB-NEXT: beqz a4, .LBB11_3 +; RV32ZBB-NEXT: j .LBB11_4 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: maxu a1, a1, a3 -; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: sltu a4, a2, a0 +; RV32ZBB-NEXT: bnez a4, .LBB11_4 ; RV32ZBB-NEXT: .LBB11_3: ; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB11_2 +; RV32ZBB-NEXT: mv a1, a3 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: maxu a0, a4, a2 -; RV32ZBB-NEXT: maxu a1, a1, a3 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: umax_i64: @@ -424,27 +394,18 @@ declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone define i64 @umin_i64(i64 %a, i64 %b) { ; RV32I-LABEL: umin_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: bgeu a1, a3, .LBB15_5 +; RV32I-NEXT: beq a1, a3, .LBB15_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: bgeu a0, a2, .LBB15_6 +; RV32I-NEXT: sltu a4, a1, a3 +; RV32I-NEXT: beqz a4, .LBB15_3 +; RV32I-NEXT: j .LBB15_4 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: bne a1, a3, .LBB15_7 +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: bnez a4, .LBB15_4 ; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: bgeu a1, a3, .LBB15_8 -; RV32I-NEXT: .LBB15_4: -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB15_5: -; RV32I-NEXT: mv a4, a2 -; RV32I-NEXT: bltu a0, a2, .LBB15_2 -; RV32I-NEXT: .LBB15_6: ; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beq a1, a3, .LBB15_3 -; RV32I-NEXT: .LBB15_7: -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: bltu a1, a3, .LBB15_4 -; RV32I-NEXT: .LBB15_8: ; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB15_4: ; RV32I-NEXT: ret ; ; RV64I-LABEL: umin_i64: @@ -457,19 +418,18 @@ define i64 @umin_i64(i64 %a, i64 %b) { ; ; RV32ZBB-LABEL: umin_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bgeu a1, a3, .LBB15_3 +; RV32ZBB-NEXT: beq a1, a3, .LBB15_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB15_4 +; RV32ZBB-NEXT: sltu a4, a1, a3 +; RV32ZBB-NEXT: beqz a4, .LBB15_3 +; RV32ZBB-NEXT: j .LBB15_4 ; RV32ZBB-NEXT: .LBB15_2: -; RV32ZBB-NEXT: minu a1, a1, a3 -; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: sltu a4, a0, a2 +; RV32ZBB-NEXT: bnez a4, .LBB15_4 ; RV32ZBB-NEXT: .LBB15_3: ; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB15_2 +; RV32ZBB-NEXT: mv a1, a3 ; RV32ZBB-NEXT: .LBB15_4: -; RV32ZBB-NEXT: minu a0, a4, a2 -; RV32ZBB-NEXT: minu a1, a1, a3 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: umin_i64: diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 0318cba..1dee75c 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -472,38 +472,21 @@ define i32 @min_i32(i32 %a, i32 %b) nounwind { ; extensions introduce instructions suitable for this pattern. define i64 @min_i64(i64 %a, i64 %b) nounwind { -; RV32I-LABEL: min_i64: -; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB11_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a1, a3 -; RV32I-NEXT: beqz a4, .LBB11_3 -; RV32I-NEXT: j .LBB11_4 -; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: bnez a4, .LBB11_4 -; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: min_i64: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bge a1, a3, .LBB11_3 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB11_4 -; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: min a1, a1, a3 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB11_3: -; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB11_2 -; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: minu a0, a4, a2 -; RV32ZBB-NEXT: min a1, a1, a3 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: min_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: beq a1, a3, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: slt a4, a1, a3 +; CHECK-NEXT: beqz a4, .LBB11_3 +; CHECK-NEXT: j .LBB11_4 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: sltu a4, a0, a2 +; CHECK-NEXT: bnez a4, .LBB11_4 +; CHECK-NEXT: .LBB11_3: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB11_4: +; CHECK-NEXT: ret %cmp = icmp slt i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b ret i64 %cond @@ -533,38 +516,21 @@ define i32 @max_i32(i32 %a, i32 %b) nounwind { ; extensions introduce instructions suitable for this pattern. define i64 @max_i64(i64 %a, i64 %b) nounwind { -; RV32I-LABEL: max_i64: -; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB13_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a3, a1 -; RV32I-NEXT: beqz a4, .LBB13_3 -; RV32I-NEXT: j .LBB13_4 -; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: bnez a4, .LBB13_4 -; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: .LBB13_4: -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: max_i64: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bge a3, a1, .LBB13_3 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB13_4 -; RV32ZBB-NEXT: .LBB13_2: -; RV32ZBB-NEXT: max a1, a1, a3 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB13_3: -; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB13_2 -; RV32ZBB-NEXT: .LBB13_4: -; RV32ZBB-NEXT: maxu a0, a4, a2 -; RV32ZBB-NEXT: max a1, a1, a3 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: max_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: beq a1, a3, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: slt a4, a3, a1 +; CHECK-NEXT: beqz a4, .LBB13_3 +; CHECK-NEXT: j .LBB13_4 +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: sltu a4, a2, a0 +; CHECK-NEXT: bnez a4, .LBB13_4 +; CHECK-NEXT: .LBB13_3: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: ret %cmp = icmp sgt i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b ret i64 %cond @@ -594,38 +560,21 @@ define i32 @minu_i32(i32 %a, i32 %b) nounwind { ; extensions introduce instructions suitable for this pattern. define i64 @minu_i64(i64 %a, i64 %b) nounwind { -; RV32I-LABEL: minu_i64: -; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB15_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a1, a3 -; RV32I-NEXT: beqz a4, .LBB15_3 -; RV32I-NEXT: j .LBB15_4 -; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: bnez a4, .LBB15_4 -; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: .LBB15_4: -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: minu_i64: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bgeu a1, a3, .LBB15_3 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB15_4 -; RV32ZBB-NEXT: .LBB15_2: -; RV32ZBB-NEXT: minu a1, a1, a3 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB15_3: -; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB15_2 -; RV32ZBB-NEXT: .LBB15_4: -; RV32ZBB-NEXT: minu a0, a4, a2 -; RV32ZBB-NEXT: minu a1, a1, a3 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: minu_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: beq a1, a3, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: beqz a4, .LBB15_3 +; CHECK-NEXT: j .LBB15_4 +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: sltu a4, a0, a2 +; CHECK-NEXT: bnez a4, .LBB15_4 +; CHECK-NEXT: .LBB15_3: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB15_4: +; CHECK-NEXT: ret %cmp = icmp ult i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b ret i64 %cond @@ -655,38 +604,21 @@ define i32 @maxu_i32(i32 %a, i32 %b) nounwind { ; extensions introduce instructions suitable for this pattern. define i64 @maxu_i64(i64 %a, i64 %b) nounwind { -; RV32I-LABEL: maxu_i64: -; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB17_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a3, a1 -; RV32I-NEXT: beqz a4, .LBB17_3 -; RV32I-NEXT: j .LBB17_4 -; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: bnez a4, .LBB17_4 -; RV32I-NEXT: .LBB17_3: -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: .LBB17_4: -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: maxu_i64: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a4, a0 -; RV32ZBB-NEXT: bgeu a3, a1, .LBB17_3 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: beq a1, a3, .LBB17_4 -; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: maxu a1, a1, a3 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB17_3: -; RV32ZBB-NEXT: mv a0, a2 -; RV32ZBB-NEXT: bne a1, a3, .LBB17_2 -; RV32ZBB-NEXT: .LBB17_4: -; RV32ZBB-NEXT: maxu a0, a4, a2 -; RV32ZBB-NEXT: maxu a1, a1, a3 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: maxu_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: beq a1, a3, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sltu a4, a3, a1 +; CHECK-NEXT: beqz a4, .LBB17_3 +; CHECK-NEXT: j .LBB17_4 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: sltu a4, a2, a0 +; CHECK-NEXT: bnez a4, .LBB17_4 +; CHECK-NEXT: .LBB17_3: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: ret %cmp = icmp ugt i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b ret i64 %cond diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 2ae3dad..daa2e91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -5378,43 +5378,58 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv a2, a0 ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB45_15 +; CHECK-NOV-NEXT: beqz a1, .LBB45_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB45_16 -; CHECK-NOV-NEXT: .LBB45_2: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB45_17 +; CHECK-NOV-NEXT: slti a4, a1, 0 +; CHECK-NOV-NEXT: beqz a4, .LBB45_3 +; CHECK-NOV-NEXT: j .LBB45_4 +; CHECK-NOV-NEXT: .LBB45_2: +; CHECK-NOV-NEXT: sltu a4, a2, a3 +; CHECK-NOV-NEXT: bnez a4, .LBB45_4 ; CHECK-NOV-NEXT: .LBB45_3: # %entry -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bgez s1, .LBB45_18 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB45_4: # %entry -; CHECK-NOV-NEXT: bgeu s0, a3, .LBB45_19 -; CHECK-NOV-NEXT: .LBB45_5: # %entry -; CHECK-NOV-NEXT: beqz s1, .LBB45_7 -; CHECK-NOV-NEXT: .LBB45_6: # %entry -; CHECK-NOV-NEXT: mv s0, a4 +; CHECK-NOV-NEXT: beqz s1, .LBB45_6 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: slti a6, s1, 0 +; CHECK-NOV-NEXT: j .LBB45_7 +; CHECK-NOV-NEXT: .LBB45_6: +; CHECK-NOV-NEXT: sltu a6, s0, a3 ; CHECK-NOV-NEXT: .LBB45_7: # %entry -; CHECK-NOV-NEXT: srai a4, s1, 63 -; CHECK-NOV-NEXT: and a4, a4, s1 -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: mv a5, s0 -; CHECK-NOV-NEXT: bltz a4, .LBB45_20 +; CHECK-NOV-NEXT: neg a5, a6 +; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: bnez a6, .LBB45_9 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: srai a6, a1, 63 -; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_21 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB45_9: # %entry -; CHECK-NOV-NEXT: and a1, a6, a1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB45_22 -; CHECK-NOV-NEXT: .LBB45_10: # %entry -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bltz a1, .LBB45_23 -; CHECK-NOV-NEXT: .LBB45_11: # %entry -; CHECK-NOV-NEXT: bgeu a3, a2, .LBB45_24 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: slli a3, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB45_11 +; CHECK-NOV-NEXT: # %bb.10: # %entry +; CHECK-NOV-NEXT: slti a5, a5, 0 +; CHECK-NOV-NEXT: xori a5, a5, 1 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: beqz a5, .LBB45_12 +; CHECK-NOV-NEXT: j .LBB45_13 +; CHECK-NOV-NEXT: .LBB45_11: +; CHECK-NOV-NEXT: sltu a5, a3, s0 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: bnez a5, .LBB45_13 ; CHECK-NOV-NEXT: .LBB45_12: # %entry -; CHECK-NOV-NEXT: beq a1, a0, .LBB45_14 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB45_13: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: .LBB45_14: # %entry +; CHECK-NOV-NEXT: beq a1, a0, .LBB45_15 +; CHECK-NOV-NEXT: # %bb.14: # %entry +; CHECK-NOV-NEXT: slti a0, a1, 0 +; CHECK-NOV-NEXT: xori a0, a0, 1 +; CHECK-NOV-NEXT: beqz a0, .LBB45_16 +; CHECK-NOV-NEXT: j .LBB45_17 +; CHECK-NOV-NEXT: .LBB45_15: +; CHECK-NOV-NEXT: sltu a0, a3, a2 +; CHECK-NOV-NEXT: bnez a0, .LBB45_17 +; CHECK-NOV-NEXT: .LBB45_16: # %entry +; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: .LBB45_17: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -5423,42 +5438,6 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB45_15: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a2, a3, .LBB45_2 -; CHECK-NOV-NEXT: .LBB45_16: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: beqz a1, .LBB45_3 -; CHECK-NOV-NEXT: .LBB45_17: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bltz s1, .LBB45_4 -; CHECK-NOV-NEXT: .LBB45_18: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu s0, a3, .LBB45_5 -; CHECK-NOV-NEXT: .LBB45_19: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: bnez s1, .LBB45_6 -; CHECK-NOV-NEXT: j .LBB45_7 -; CHECK-NOV-NEXT: .LBB45_20: # %entry -; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: srai a6, a1, 63 -; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_9 -; CHECK-NOV-NEXT: .LBB45_21: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: and a1, a6, a1 -; CHECK-NOV-NEXT: beq a4, a0, .LBB45_10 -; CHECK-NOV-NEXT: .LBB45_22: # %entry -; CHECK-NOV-NEXT: mv s0, a5 -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB45_11 -; CHECK-NOV-NEXT: .LBB45_23: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, a2, .LBB45_12 -; CHECK-NOV-NEXT: .LBB45_24: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bne a1, a0, .LBB45_13 -; CHECK-NOV-NEXT: j .LBB45_14 ; ; CHECK-V-LABEL: stest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -5489,43 +5468,59 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bgez s1, .LBB45_15 +; CHECK-V-NEXT: beqz s1, .LBB45_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgeu s0, a3, .LBB45_16 -; CHECK-V-NEXT: .LBB45_2: # %entry -; CHECK-V-NEXT: bnez s1, .LBB45_17 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: beqz a4, .LBB45_3 +; CHECK-V-NEXT: j .LBB45_4 +; CHECK-V-NEXT: .LBB45_2: +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: bnez a4, .LBB45_4 ; CHECK-V-NEXT: .LBB45_3: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB45_18 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB45_4: # %entry -; CHECK-V-NEXT: bgeu a0, a3, .LBB45_19 -; CHECK-V-NEXT: .LBB45_5: # %entry -; CHECK-V-NEXT: beqz a1, .LBB45_7 -; CHECK-V-NEXT: .LBB45_6: # %entry -; CHECK-V-NEXT: mv a0, a4 +; CHECK-V-NEXT: neg a5, a4 +; CHECK-V-NEXT: beqz a1, .LBB45_6 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: beqz a4, .LBB45_7 +; CHECK-V-NEXT: j .LBB45_8 +; CHECK-V-NEXT: .LBB45_6: +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a4, .LBB45_8 ; CHECK-V-NEXT: .LBB45_7: # %entry -; CHECK-V-NEXT: srai a4, s1, 63 -; CHECK-V-NEXT: and a4, a4, s1 +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB45_8: # %entry +; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: mv a5, s0 -; CHECK-V-NEXT: bltz a4, .LBB45_20 -; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: srai a6, a1, 63 -; CHECK-V-NEXT: bgeu a3, s0, .LBB45_21 -; CHECK-V-NEXT: .LBB45_9: # %entry -; CHECK-V-NEXT: and a1, a6, a1 -; CHECK-V-NEXT: bne a4, a2, .LBB45_22 -; CHECK-V-NEXT: .LBB45_10: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB45_23 +; CHECK-V-NEXT: beq a5, a2, .LBB45_10 +; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: slti a5, a5, 0 +; CHECK-V-NEXT: xori a5, a5, 1 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: beqz a5, .LBB45_11 +; CHECK-V-NEXT: j .LBB45_12 +; CHECK-V-NEXT: .LBB45_10: +; CHECK-V-NEXT: sltu a5, a3, s0 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: bnez a5, .LBB45_12 ; CHECK-V-NEXT: .LBB45_11: # %entry -; CHECK-V-NEXT: bgeu a3, a0, .LBB45_24 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB45_12: # %entry ; CHECK-V-NEXT: beq a1, a2, .LBB45_14 -; CHECK-V-NEXT: .LBB45_13: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: .LBB45_14: # %entry +; CHECK-V-NEXT: # %bb.13: # %entry +; CHECK-V-NEXT: slti a1, a1, 0 +; CHECK-V-NEXT: xori a1, a1, 1 +; CHECK-V-NEXT: beqz a1, .LBB45_15 +; CHECK-V-NEXT: j .LBB45_16 +; CHECK-V-NEXT: .LBB45_14: +; CHECK-V-NEXT: sltu a1, a3, a0 +; CHECK-V-NEXT: bnez a1, .LBB45_16 +; CHECK-V-NEXT: .LBB45_15: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB45_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 @@ -5538,42 +5533,6 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB45_15: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu s0, a3, .LBB45_2 -; CHECK-V-NEXT: .LBB45_16: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: beqz s1, .LBB45_3 -; CHECK-V-NEXT: .LBB45_17: # %entry -; CHECK-V-NEXT: mv s0, a4 -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB45_4 -; CHECK-V-NEXT: .LBB45_18: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a0, a3, .LBB45_5 -; CHECK-V-NEXT: .LBB45_19: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bnez a1, .LBB45_6 -; CHECK-V-NEXT: j .LBB45_7 -; CHECK-V-NEXT: .LBB45_20: # %entry -; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: srai a6, a1, 63 -; CHECK-V-NEXT: bltu a3, s0, .LBB45_9 -; CHECK-V-NEXT: .LBB45_21: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: and a1, a6, a1 -; CHECK-V-NEXT: beq a4, a2, .LBB45_10 -; CHECK-V-NEXT: .LBB45_22: # %entry -; CHECK-V-NEXT: mv s0, a5 -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB45_11 -; CHECK-V-NEXT: .LBB45_23: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, a0, .LBB45_12 -; CHECK-V-NEXT: .LBB45_24: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bne a1, a2, .LBB45_13 -; CHECK-V-NEXT: j .LBB45_14 entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -5602,20 +5561,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5653,15 +5604,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a3, s1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -5712,20 +5655,12 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 1 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 -; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: slti a1, a1, 1 +; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a0 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a4, s1 -; CHECK-NOV-NEXT: addi a4, a4, -1 -; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: slti a3, a3, 0 ; CHECK-NOV-NEXT: addi a3, a3, -1 ; CHECK-NOV-NEXT: and a0, a3, a0 @@ -5774,16 +5709,8 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: slti a3, s0, 1 ; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: addi a4, s0, -1 -; CHECK-V-NEXT: seqz a4, a4 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: and a3, a4, a3 -; CHECK-V-NEXT: slti a4, a1, 1 -; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: slti a1, a1, 1 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: blez s0, .LBB47_4 ; CHECK-V-NEXT: # %bb.3: # %entry @@ -5837,43 +5764,58 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv a2, a0 ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB48_15 +; CHECK-NOV-NEXT: beqz a1, .LBB48_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB48_16 -; CHECK-NOV-NEXT: .LBB48_2: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB48_17 +; CHECK-NOV-NEXT: slti a4, a1, 0 +; CHECK-NOV-NEXT: beqz a4, .LBB48_3 +; CHECK-NOV-NEXT: j .LBB48_4 +; CHECK-NOV-NEXT: .LBB48_2: +; CHECK-NOV-NEXT: sltu a4, a2, a3 +; CHECK-NOV-NEXT: bnez a4, .LBB48_4 ; CHECK-NOV-NEXT: .LBB48_3: # %entry -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bgez s1, .LBB48_18 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB48_4: # %entry -; CHECK-NOV-NEXT: bgeu s0, a3, .LBB48_19 -; CHECK-NOV-NEXT: .LBB48_5: # %entry -; CHECK-NOV-NEXT: beqz s1, .LBB48_7 -; CHECK-NOV-NEXT: .LBB48_6: # %entry -; CHECK-NOV-NEXT: mv s0, a4 +; CHECK-NOV-NEXT: beqz s1, .LBB48_6 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: slti a6, s1, 0 +; CHECK-NOV-NEXT: j .LBB48_7 +; CHECK-NOV-NEXT: .LBB48_6: +; CHECK-NOV-NEXT: sltu a6, s0, a3 ; CHECK-NOV-NEXT: .LBB48_7: # %entry -; CHECK-NOV-NEXT: srai a4, s1, 63 -; CHECK-NOV-NEXT: and a4, a4, s1 -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: mv a5, s0 -; CHECK-NOV-NEXT: bltz a4, .LBB48_20 +; CHECK-NOV-NEXT: neg a5, a6 +; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: bnez a6, .LBB48_9 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: srai a6, a1, 63 -; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_21 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB48_9: # %entry -; CHECK-NOV-NEXT: and a1, a6, a1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB48_22 -; CHECK-NOV-NEXT: .LBB48_10: # %entry -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bltz a1, .LBB48_23 -; CHECK-NOV-NEXT: .LBB48_11: # %entry -; CHECK-NOV-NEXT: bgeu a3, a2, .LBB48_24 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: slli a3, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB48_11 +; CHECK-NOV-NEXT: # %bb.10: # %entry +; CHECK-NOV-NEXT: slti a5, a5, 0 +; CHECK-NOV-NEXT: xori a5, a5, 1 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: beqz a5, .LBB48_12 +; CHECK-NOV-NEXT: j .LBB48_13 +; CHECK-NOV-NEXT: .LBB48_11: +; CHECK-NOV-NEXT: sltu a5, a3, s0 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: bnez a5, .LBB48_13 ; CHECK-NOV-NEXT: .LBB48_12: # %entry -; CHECK-NOV-NEXT: beq a1, a0, .LBB48_14 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB48_13: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: .LBB48_14: # %entry +; CHECK-NOV-NEXT: beq a1, a0, .LBB48_15 +; CHECK-NOV-NEXT: # %bb.14: # %entry +; CHECK-NOV-NEXT: slti a0, a1, 0 +; CHECK-NOV-NEXT: xori a0, a0, 1 +; CHECK-NOV-NEXT: beqz a0, .LBB48_16 +; CHECK-NOV-NEXT: j .LBB48_17 +; CHECK-NOV-NEXT: .LBB48_15: +; CHECK-NOV-NEXT: sltu a0, a3, a2 +; CHECK-NOV-NEXT: bnez a0, .LBB48_17 +; CHECK-NOV-NEXT: .LBB48_16: # %entry +; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: .LBB48_17: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -5882,42 +5824,6 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB48_15: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a2, a3, .LBB48_2 -; CHECK-NOV-NEXT: .LBB48_16: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: beqz a1, .LBB48_3 -; CHECK-NOV-NEXT: .LBB48_17: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bltz s1, .LBB48_4 -; CHECK-NOV-NEXT: .LBB48_18: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu s0, a3, .LBB48_5 -; CHECK-NOV-NEXT: .LBB48_19: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: bnez s1, .LBB48_6 -; CHECK-NOV-NEXT: j .LBB48_7 -; CHECK-NOV-NEXT: .LBB48_20: # %entry -; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: srai a6, a1, 63 -; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_9 -; CHECK-NOV-NEXT: .LBB48_21: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: and a1, a6, a1 -; CHECK-NOV-NEXT: beq a4, a0, .LBB48_10 -; CHECK-NOV-NEXT: .LBB48_22: # %entry -; CHECK-NOV-NEXT: mv s0, a5 -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB48_11 -; CHECK-NOV-NEXT: .LBB48_23: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, a2, .LBB48_12 -; CHECK-NOV-NEXT: .LBB48_24: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bne a1, a0, .LBB48_13 -; CHECK-NOV-NEXT: j .LBB48_14 ; ; CHECK-V-LABEL: stest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -5948,43 +5854,59 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bgez s1, .LBB48_15 +; CHECK-V-NEXT: beqz s1, .LBB48_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgeu s0, a3, .LBB48_16 -; CHECK-V-NEXT: .LBB48_2: # %entry -; CHECK-V-NEXT: bnez s1, .LBB48_17 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: beqz a4, .LBB48_3 +; CHECK-V-NEXT: j .LBB48_4 +; CHECK-V-NEXT: .LBB48_2: +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: bnez a4, .LBB48_4 ; CHECK-V-NEXT: .LBB48_3: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB48_18 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB48_4: # %entry -; CHECK-V-NEXT: bgeu a0, a3, .LBB48_19 -; CHECK-V-NEXT: .LBB48_5: # %entry -; CHECK-V-NEXT: beqz a1, .LBB48_7 -; CHECK-V-NEXT: .LBB48_6: # %entry -; CHECK-V-NEXT: mv a0, a4 +; CHECK-V-NEXT: neg a5, a4 +; CHECK-V-NEXT: beqz a1, .LBB48_6 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: beqz a4, .LBB48_7 +; CHECK-V-NEXT: j .LBB48_8 +; CHECK-V-NEXT: .LBB48_6: +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a4, .LBB48_8 ; CHECK-V-NEXT: .LBB48_7: # %entry -; CHECK-V-NEXT: srai a4, s1, 63 -; CHECK-V-NEXT: and a4, a4, s1 +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB48_8: # %entry +; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: mv a5, s0 -; CHECK-V-NEXT: bltz a4, .LBB48_20 -; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: srai a6, a1, 63 -; CHECK-V-NEXT: bgeu a3, s0, .LBB48_21 -; CHECK-V-NEXT: .LBB48_9: # %entry -; CHECK-V-NEXT: and a1, a6, a1 -; CHECK-V-NEXT: bne a4, a2, .LBB48_22 -; CHECK-V-NEXT: .LBB48_10: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB48_23 +; CHECK-V-NEXT: beq a5, a2, .LBB48_10 +; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: slti a5, a5, 0 +; CHECK-V-NEXT: xori a5, a5, 1 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: beqz a5, .LBB48_11 +; CHECK-V-NEXT: j .LBB48_12 +; CHECK-V-NEXT: .LBB48_10: +; CHECK-V-NEXT: sltu a5, a3, s0 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: bnez a5, .LBB48_12 ; CHECK-V-NEXT: .LBB48_11: # %entry -; CHECK-V-NEXT: bgeu a3, a0, .LBB48_24 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB48_12: # %entry ; CHECK-V-NEXT: beq a1, a2, .LBB48_14 -; CHECK-V-NEXT: .LBB48_13: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: .LBB48_14: # %entry +; CHECK-V-NEXT: # %bb.13: # %entry +; CHECK-V-NEXT: slti a1, a1, 0 +; CHECK-V-NEXT: xori a1, a1, 1 +; CHECK-V-NEXT: beqz a1, .LBB48_15 +; CHECK-V-NEXT: j .LBB48_16 +; CHECK-V-NEXT: .LBB48_14: +; CHECK-V-NEXT: sltu a1, a3, a0 +; CHECK-V-NEXT: bnez a1, .LBB48_16 +; CHECK-V-NEXT: .LBB48_15: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB48_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 @@ -5997,42 +5919,6 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB48_15: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu s0, a3, .LBB48_2 -; CHECK-V-NEXT: .LBB48_16: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: beqz s1, .LBB48_3 -; CHECK-V-NEXT: .LBB48_17: # %entry -; CHECK-V-NEXT: mv s0, a4 -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB48_4 -; CHECK-V-NEXT: .LBB48_18: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a0, a3, .LBB48_5 -; CHECK-V-NEXT: .LBB48_19: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bnez a1, .LBB48_6 -; CHECK-V-NEXT: j .LBB48_7 -; CHECK-V-NEXT: .LBB48_20: # %entry -; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: srai a6, a1, 63 -; CHECK-V-NEXT: bltu a3, s0, .LBB48_9 -; CHECK-V-NEXT: .LBB48_21: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: and a1, a6, a1 -; CHECK-V-NEXT: beq a4, a2, .LBB48_10 -; CHECK-V-NEXT: .LBB48_22: # %entry -; CHECK-V-NEXT: mv s0, a5 -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB48_11 -; CHECK-V-NEXT: .LBB48_23: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, a0, .LBB48_12 -; CHECK-V-NEXT: .LBB48_24: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bne a1, a2, .LBB48_13 -; CHECK-V-NEXT: j .LBB48_14 entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6061,20 +5947,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6112,15 +5990,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a3, s1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -6171,20 +6041,12 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 1 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 -; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: slti a1, a1, 1 +; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a0 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a4, s1 -; CHECK-NOV-NEXT: addi a4, a4, -1 -; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: slti a3, a3, 0 ; CHECK-NOV-NEXT: addi a3, a3, -1 ; CHECK-NOV-NEXT: and a0, a3, a0 @@ -6233,16 +6095,8 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: slti a3, s0, 1 ; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: addi a4, s0, -1 -; CHECK-V-NEXT: seqz a4, a4 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: and a3, a4, a3 -; CHECK-V-NEXT: slti a4, a1, 1 -; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: slti a1, a1, 1 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: blez s0, .LBB50_4 ; CHECK-V-NEXT: # %bb.3: # %entry @@ -6298,43 +6152,58 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a2, a0 ; CHECK-NOV-NEXT: li a0, -1 ; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB51_15 +; CHECK-NOV-NEXT: beqz a1, .LBB51_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB51_16 -; CHECK-NOV-NEXT: .LBB51_2: # %entry -; CHECK-NOV-NEXT: bnez a1, .LBB51_17 +; CHECK-NOV-NEXT: slti a4, a1, 0 +; CHECK-NOV-NEXT: beqz a4, .LBB51_3 +; CHECK-NOV-NEXT: j .LBB51_4 +; CHECK-NOV-NEXT: .LBB51_2: +; CHECK-NOV-NEXT: sltu a4, a2, a3 +; CHECK-NOV-NEXT: bnez a4, .LBB51_4 ; CHECK-NOV-NEXT: .LBB51_3: # %entry -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bgez s1, .LBB51_18 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB51_4: # %entry -; CHECK-NOV-NEXT: bgeu s0, a3, .LBB51_19 -; CHECK-NOV-NEXT: .LBB51_5: # %entry -; CHECK-NOV-NEXT: beqz s1, .LBB51_7 -; CHECK-NOV-NEXT: .LBB51_6: # %entry -; CHECK-NOV-NEXT: mv s0, a4 +; CHECK-NOV-NEXT: beqz s1, .LBB51_6 +; CHECK-NOV-NEXT: # %bb.5: # %entry +; CHECK-NOV-NEXT: slti a6, s1, 0 +; CHECK-NOV-NEXT: j .LBB51_7 +; CHECK-NOV-NEXT: .LBB51_6: +; CHECK-NOV-NEXT: sltu a6, s0, a3 ; CHECK-NOV-NEXT: .LBB51_7: # %entry -; CHECK-NOV-NEXT: srai a4, s1, 63 -; CHECK-NOV-NEXT: and a4, a4, s1 -; CHECK-NOV-NEXT: slli a3, a0, 63 -; CHECK-NOV-NEXT: mv a5, s0 -; CHECK-NOV-NEXT: bltz a4, .LBB51_20 +; CHECK-NOV-NEXT: neg a5, a6 +; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: bnez a6, .LBB51_9 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: srai a6, a1, 63 -; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_21 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB51_9: # %entry -; CHECK-NOV-NEXT: and a1, a6, a1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB51_22 -; CHECK-NOV-NEXT: .LBB51_10: # %entry -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bltz a1, .LBB51_23 -; CHECK-NOV-NEXT: .LBB51_11: # %entry -; CHECK-NOV-NEXT: bgeu a3, a2, .LBB51_24 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: slli a3, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB51_11 +; CHECK-NOV-NEXT: # %bb.10: # %entry +; CHECK-NOV-NEXT: slti a5, a5, 0 +; CHECK-NOV-NEXT: xori a5, a5, 1 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: beqz a5, .LBB51_12 +; CHECK-NOV-NEXT: j .LBB51_13 +; CHECK-NOV-NEXT: .LBB51_11: +; CHECK-NOV-NEXT: sltu a5, a3, s0 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: bnez a5, .LBB51_13 ; CHECK-NOV-NEXT: .LBB51_12: # %entry -; CHECK-NOV-NEXT: beq a1, a0, .LBB51_14 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB51_13: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: .LBB51_14: # %entry +; CHECK-NOV-NEXT: beq a1, a0, .LBB51_15 +; CHECK-NOV-NEXT: # %bb.14: # %entry +; CHECK-NOV-NEXT: slti a0, a1, 0 +; CHECK-NOV-NEXT: xori a0, a0, 1 +; CHECK-NOV-NEXT: beqz a0, .LBB51_16 +; CHECK-NOV-NEXT: j .LBB51_17 +; CHECK-NOV-NEXT: .LBB51_15: +; CHECK-NOV-NEXT: sltu a0, a3, a2 +; CHECK-NOV-NEXT: bnez a0, .LBB51_17 +; CHECK-NOV-NEXT: .LBB51_16: # %entry +; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: .LBB51_17: # %entry ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -6343,42 +6212,6 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: addi sp, sp, 32 ; CHECK-NOV-NEXT: ret -; CHECK-NOV-NEXT: .LBB51_15: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a2, a3, .LBB51_2 -; CHECK-NOV-NEXT: .LBB51_16: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: beqz a1, .LBB51_3 -; CHECK-NOV-NEXT: .LBB51_17: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: mv a4, s0 -; CHECK-NOV-NEXT: bltz s1, .LBB51_4 -; CHECK-NOV-NEXT: .LBB51_18: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu s0, a3, .LBB51_5 -; CHECK-NOV-NEXT: .LBB51_19: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: bnez s1, .LBB51_6 -; CHECK-NOV-NEXT: j .LBB51_7 -; CHECK-NOV-NEXT: .LBB51_20: # %entry -; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: srai a6, a1, 63 -; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_9 -; CHECK-NOV-NEXT: .LBB51_21: # %entry -; CHECK-NOV-NEXT: mv s0, a3 -; CHECK-NOV-NEXT: and a1, a6, a1 -; CHECK-NOV-NEXT: beq a4, a0, .LBB51_10 -; CHECK-NOV-NEXT: .LBB51_22: # %entry -; CHECK-NOV-NEXT: mv s0, a5 -; CHECK-NOV-NEXT: mv a4, a2 -; CHECK-NOV-NEXT: bgez a1, .LBB51_11 -; CHECK-NOV-NEXT: .LBB51_23: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bltu a3, a2, .LBB51_12 -; CHECK-NOV-NEXT: .LBB51_24: # %entry -; CHECK-NOV-NEXT: mv a2, a3 -; CHECK-NOV-NEXT: bne a1, a0, .LBB51_13 -; CHECK-NOV-NEXT: j .LBB51_14 ; ; CHECK-V-LABEL: stest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry @@ -6402,43 +6235,58 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB51_15 +; CHECK-V-NEXT: beqz a1, .LBB51_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: bgeu a0, a3, .LBB51_16 -; CHECK-V-NEXT: .LBB51_2: # %entry -; CHECK-V-NEXT: bnez a1, .LBB51_17 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: beqz a4, .LBB51_3 +; CHECK-V-NEXT: j .LBB51_4 +; CHECK-V-NEXT: .LBB51_2: +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: bnez a4, .LBB51_4 ; CHECK-V-NEXT: .LBB51_3: # %entry -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bgez s1, .LBB51_18 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB51_4: # %entry -; CHECK-V-NEXT: bgeu s0, a3, .LBB51_19 -; CHECK-V-NEXT: .LBB51_5: # %entry -; CHECK-V-NEXT: beqz s1, .LBB51_7 -; CHECK-V-NEXT: .LBB51_6: # %entry -; CHECK-V-NEXT: mv s0, a4 +; CHECK-V-NEXT: beqz s1, .LBB51_6 +; CHECK-V-NEXT: # %bb.5: # %entry +; CHECK-V-NEXT: slti a6, s1, 0 +; CHECK-V-NEXT: j .LBB51_7 +; CHECK-V-NEXT: .LBB51_6: +; CHECK-V-NEXT: sltu a6, s0, a3 ; CHECK-V-NEXT: .LBB51_7: # %entry -; CHECK-V-NEXT: srai a4, s1, 63 -; CHECK-V-NEXT: and a4, a4, s1 -; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: mv a5, s0 -; CHECK-V-NEXT: bltz a4, .LBB51_20 +; CHECK-V-NEXT: neg a5, a6 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a6, .LBB51_9 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: srai a6, a1, 63 -; CHECK-V-NEXT: bgeu a3, s0, .LBB51_21 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB51_9: # %entry -; CHECK-V-NEXT: and a1, a6, a1 -; CHECK-V-NEXT: bne a4, a2, .LBB51_22 -; CHECK-V-NEXT: .LBB51_10: # %entry -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bltz a1, .LBB51_23 -; CHECK-V-NEXT: .LBB51_11: # %entry -; CHECK-V-NEXT: bgeu a3, a0, .LBB51_24 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: slli a3, a2, 63 +; CHECK-V-NEXT: beq a5, a2, .LBB51_11 +; CHECK-V-NEXT: # %bb.10: # %entry +; CHECK-V-NEXT: slti a5, a5, 0 +; CHECK-V-NEXT: xori a5, a5, 1 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: beqz a5, .LBB51_12 +; CHECK-V-NEXT: j .LBB51_13 +; CHECK-V-NEXT: .LBB51_11: +; CHECK-V-NEXT: sltu a5, a3, s0 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: bnez a5, .LBB51_13 ; CHECK-V-NEXT: .LBB51_12: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB51_14 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB51_13: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: .LBB51_14: # %entry +; CHECK-V-NEXT: beq a1, a2, .LBB51_15 +; CHECK-V-NEXT: # %bb.14: # %entry +; CHECK-V-NEXT: slti a1, a1, 0 +; CHECK-V-NEXT: xori a1, a1, 1 +; CHECK-V-NEXT: beqz a1, .LBB51_16 +; CHECK-V-NEXT: j .LBB51_17 +; CHECK-V-NEXT: .LBB51_15: +; CHECK-V-NEXT: sltu a1, a3, a0 +; CHECK-V-NEXT: bnez a1, .LBB51_17 +; CHECK-V-NEXT: .LBB51_16: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB51_17: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vmv.s.x v8, s0 @@ -6449,42 +6297,6 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret -; CHECK-V-NEXT: .LBB51_15: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a0, a3, .LBB51_2 -; CHECK-V-NEXT: .LBB51_16: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: beqz a1, .LBB51_3 -; CHECK-V-NEXT: .LBB51_17: # %entry -; CHECK-V-NEXT: mv a0, a4 -; CHECK-V-NEXT: mv a4, s0 -; CHECK-V-NEXT: bltz s1, .LBB51_4 -; CHECK-V-NEXT: .LBB51_18: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu s0, a3, .LBB51_5 -; CHECK-V-NEXT: .LBB51_19: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: bnez s1, .LBB51_6 -; CHECK-V-NEXT: j .LBB51_7 -; CHECK-V-NEXT: .LBB51_20: # %entry -; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: srai a6, a1, 63 -; CHECK-V-NEXT: bltu a3, s0, .LBB51_9 -; CHECK-V-NEXT: .LBB51_21: # %entry -; CHECK-V-NEXT: mv s0, a3 -; CHECK-V-NEXT: and a1, a6, a1 -; CHECK-V-NEXT: beq a4, a2, .LBB51_10 -; CHECK-V-NEXT: .LBB51_22: # %entry -; CHECK-V-NEXT: mv s0, a5 -; CHECK-V-NEXT: mv a4, a0 -; CHECK-V-NEXT: bgez a1, .LBB51_11 -; CHECK-V-NEXT: .LBB51_23: # %entry -; CHECK-V-NEXT: mv a4, a3 -; CHECK-V-NEXT: bltu a3, a0, .LBB51_12 -; CHECK-V-NEXT: .LBB51_24: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: bne a1, a2, .LBB51_13 -; CHECK-V-NEXT: j .LBB51_14 entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -6515,20 +6327,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s2 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s1 -; CHECK-NOV-NEXT: addi s2, s2, -1 -; CHECK-NOV-NEXT: seqz a2, s2 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6557,20 +6361,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a2, a1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a0, a2, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: snez a1, s2 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a1, a1, s1 -; CHECK-V-NEXT: addi s2, s2, -1 -; CHECK-V-NEXT: seqz a2, s2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vmv.s.x v8, a0 @@ -6619,20 +6415,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 1 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 -; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: slti a1, a1, 1 +; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a0 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a4, s1 -; CHECK-NOV-NEXT: addi a4, a4, -1 -; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: slti a3, a3, 0 ; CHECK-NOV-NEXT: addi a3, a3, -1 ; CHECK-NOV-NEXT: and a0, a3, a0 @@ -6676,20 +6464,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a3, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 -; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: slti a1, a1, 1 +; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: slti a1, s1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a1, a1, s0 -; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a4, s1 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: and a1, a4, a1 ; CHECK-V-NEXT: slti a3, a3, 0 ; CHECK-V-NEXT: addi a3, a3, -1 ; CHECK-V-NEXT: and a1, a3, a1 diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index d4c5b47..94210d7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -1806,115 +1806,62 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r1, r3, r0, mi -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r0, r2, r0, asr #31 -; CHECK-NEXT: mvn r11, #-2147483648 -; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: add.w r3, r0, #1 -; CHECK-NEXT: csel r0, r6, r11, lo +; CHECK-NEXT: vmov r12, lr, d8 +; CHECK-NEXT: subs.w r4, r0, #-1 +; CHECK-NEXT: mvn r9, #-2147483648 +; CHECK-NEXT: sbcs.w r4, r1, r9 +; CHECK-NEXT: sbcs r4, r2, #0 +; CHECK-NEXT: mov.w r7, #-1 +; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: mov.w r10, #-2147483648 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r3, r3, r4, ne +; CHECK-NEXT: csel r2, r2, r4, ne +; CHECK-NEXT: csel r4, r0, r7, ne +; CHECK-NEXT: csel r1, r1, r9, ne +; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: sbcs.w r0, r10, r1 +; CHECK-NEXT: sbcs.w r0, r7, r2 +; CHECK-NEXT: sbcs.w r0, r7, r3 +; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r6, r11, mi -; CHECK-NEXT: orrs r2, r5 -; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r8, r0, r7, eq -; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csel r0, r8, r2, gt -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r1, r8, r2, hi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: csel r8, r1, r10, ne +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r7, r1, r11, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: csel r4, r1, r11, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r7, r7, r4, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r4, r0, r9, mi -; CHECK-NEXT: mov.w r3, #-2147483648 -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r9, r7, r3, gt -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r12, r7, r3, hi -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r3, r2, r3, asr #31 -; CHECK-NEXT: and.w r2, r3, r4 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: csel r12, r12, r9, eq -; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: csel r6, r10, r3, lo -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r6, r10, r6, eq -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r10, r3, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: csel r6, r5, r8, hi -; CHECK-NEXT: csel r6, r5, r6, eq -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r5, r5, r8, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: csel r1, lr, r3, lo -; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, lr, r3, mi +; CHECK-NEXT: subs.w r6, r0, #-1 +; CHECK-NEXT: sbcs.w r6, r1, r9 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: csel r1, r1, r9, ne +; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r2, r2, r6, ne +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: sbcs.w r6, r10, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r8, hi -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload -; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r0, r0, r8, gt +; CHECK-NEXT: csel r1, r1, r10, ne +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r3, r4, r5, ne ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1926,47 +1873,38 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r4, r1, d8 -; CHECK-NEXT: eor r6, r2, #1 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r6, r6, r3 -; CHECK-NEXT: cset r7, lo -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: cset r6, lo ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r5, r0, r6, ne +; CHECK-NEXT: csel r7, r0, r6, ne ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: eor r4, r2, #1 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r4, r4, r3 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r8, r7, ne ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r3, r5, r6, ne ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1977,64 +1915,49 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmov r12, r1, d8 -; CHECK-NEXT: eor r7, r2, #1 +; CHECK-NEXT: vmov r4, r1, d8 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r7, r7, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: csel r6, r0, r7, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r10, r3, r9, mi -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: csel r5, r3, r7, ne +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r6, #0 -; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: eor r4, r2, #1 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r4, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r3, r3, r2, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r3, r3, r9, mi ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: csel r5, r8, r5, ne ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r7, r5, r7, ne -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: csel r7, r8, r7, ne +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r7, #0 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r1, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2048,105 +1971,56 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #28 -; CHECK-NEXT: sub sp, #28 -; CHECK-NEXT: vmov r4, r0, d0 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vmov r9, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r1, r3, r0, mi -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r0, r2, r0, asr #31 +; CHECK-NEXT: subs.w r7, r0, #-1 ; CHECK-NEXT: mvn r10, #-2147483648 -; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: cmp r6, r10 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: add.w r3, r0, #1 -; CHECK-NEXT: csel r0, r6, r10, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r6, r10, mi -; CHECK-NEXT: orrs r2, r5 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r8, r0, r7, eq -; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csel r0, r8, r2, gt -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: csel r1, r8, r2, hi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: sbcs.w r7, r1, r10 +; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: sbcs r7, r2, #0 +; CHECK-NEXT: mov.w r11, #-2147483648 +; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r5, r0, r4, ne +; CHECK-NEXT: csel r3, r3, r7, ne +; CHECK-NEXT: csel r2, r2, r7, ne +; CHECK-NEXT: csel r1, r1, r10, ne +; CHECK-NEXT: rsbs r0, r5, #0 +; CHECK-NEXT: sbcs.w r0, r11, r1 +; CHECK-NEXT: sbcs.w r0, r4, r2 +; CHECK-NEXT: sbcs.w r0, r4, r3 +; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r8, r1, r11, ne ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: cmp r1, r10 -; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r7, r1, r10, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: csel r4, r1, r10, mi -; CHECK-NEXT: orrs.w r3, r2, r0 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: csel r7, r7, r4, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r4, r0, r9, mi -; CHECK-NEXT: mov.w r3, #-2147483648 -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r9, r7, r3, gt -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r12, r7, r3, hi -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r3, r2, r3, asr #31 -; CHECK-NEXT: and.w r2, r3, r4 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r12, r12, r9, eq -; CHECK-NEXT: cmp r6, r10 -; CHECK-NEXT: csel r6, r11, r3, lo -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: csel r6, r11, r6, eq -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r11, r3, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: csel r6, r5, r8, hi -; CHECK-NEXT: csel r6, r5, r6, eq -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r5, r5, r8, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp r1, r10 -; CHECK-NEXT: csel r1, lr, r3, lo -; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, lr, r3, mi +; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: sbcs.w r7, r1, r10 +; CHECK-NEXT: sbcs r7, r2, #0 +; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: csel r1, r1, r10, ne +; CHECK-NEXT: csel r3, r3, r7, ne +; CHECK-NEXT: csel r2, r2, r7, ne +; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK-NEXT: sbcs.w r7, r11, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r8, hi -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r0, r0, r8, gt +; CHECK-NEXT: csel r1, r1, r11, ne +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r3, r5, r6, ne ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 -; CHECK-NEXT: add sp, #28 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 +; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -2159,42 +2033,33 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vmov r5, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: eor r1, r2, #1 -; CHECK-NEXT: orr.w r6, r1, r3 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: cset r7, lo -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: cset r6, lo ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r4, r0, r6, ne -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: csel r7, r0, r6, ne +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: eor r5, r2, #1 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r5, r5, r3 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r8, r7, ne ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r3, r5, r6, ne ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2205,62 +2070,44 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov r9, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: vmov r5, r0, d0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: eor r1, r2, #1 -; CHECK-NEXT: orr.w r7, r1, r3 ; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: csel r6, r0, r7, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r11, r3, r10, mi -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: csel r4, r3, r7, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r6, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: eor r5, r2, #1 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r5, r5, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r3, r3, r2, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r3, r3, r10, mi ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: csel r4, r8, r4, ne ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r7, r4, r7, ne -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: csel r7, r8, r7, ne +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r7, #0 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r1, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2272,115 +2119,60 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r1, r3, r0, mi -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r0, r2, r0, asr #31 -; CHECK-NEXT: mvn r11, #-2147483648 -; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: add.w r3, r0, #1 -; CHECK-NEXT: csel r0, r6, r11, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r6, r11, mi -; CHECK-NEXT: orrs r2, r5 -; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r8, r0, r7, eq -; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csel r0, r8, r2, gt -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r1, r8, r2, hi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: mvn r9, #-2147483648 +; CHECK-NEXT: sbcs.w r7, r1, r9 +; CHECK-NEXT: mov.w r10, #-2147483648 +; CHECK-NEXT: sbcs r7, r2, #0 +; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r3, r3, r7, ne +; CHECK-NEXT: csel r2, r2, r7, ne +; CHECK-NEXT: mov.w r7, #-1 +; CHECK-NEXT: csel r1, r1, r9, ne +; CHECK-NEXT: csel r4, r0, r7, ne +; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: sbcs.w r0, r10, r1 +; CHECK-NEXT: sbcs.w r0, r7, r2 +; CHECK-NEXT: sbcs.w r0, r7, r3 +; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r7, r1, r11, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: csel r4, r1, r11, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r7, r7, r4, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r4, r0, r9, mi -; CHECK-NEXT: mov.w r3, #-2147483648 -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r9, r7, r3, gt -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r12, r7, r3, hi -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r3, r2, r3, asr #31 -; CHECK-NEXT: and.w r2, r3, r4 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: csel r12, r12, r9, eq -; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: csel r6, r10, r3, lo -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r6, r10, r6, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r10, r3, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: csel r6, r5, r8, hi -; CHECK-NEXT: csel r6, r5, r6, eq -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r5, r5, r8, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: csel r1, lr, r3, lo -; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, lr, r3, mi +; CHECK-NEXT: csel r8, r1, r10, ne +; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: subs.w r6, r0, #-1 +; CHECK-NEXT: sbcs.w r6, r1, r9 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: csel r1, r1, r9, ne +; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r2, r2, r6, ne +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: sbcs.w r6, r10, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r8, hi -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload -; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r0, r0, r8, gt +; CHECK-NEXT: csel r1, r1, r10, ne +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r3, r4, r5, ne ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2417,63 +2209,51 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: eor r1, r2, #1 -; CHECK-NEXT: orr.w r6, r1, r3 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: cset r6, lt ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: csel r5, r0, r6, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r10, r3, r9, mi +; CHECK-NEXT: csel r7, r3, r6, ne ; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: eor r4, r2, #1 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: orr.w r4, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r3, r3, r2, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r3, r3, r9, mi ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: csel r7, r8, r7, ne ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r7, r6, ne -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: csel r6, r4, r6, ne +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r7, #0 +; CHECK-NEXT: movmi r6, #0 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r6 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll index 19ec01b..f2c8440 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll @@ -46,13 +46,12 @@ declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) { ; CHECK-LABEL: smaxi64: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, gt -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, hi -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, gt +; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: sbcs.w r12, r3, r1 +; CHECK-NEXT: cset r12, lt +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: bx lr %c = call i64 @llvm.smax.i64(i64 %a, i64 %b) ret i64 %c @@ -204,13 +203,12 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, gt -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, hi -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, gt +; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: sbcs.w r12, r3, r1 +; CHECK-NEXT: cset r12, lt +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: add sp, #8 @@ -334,13 +332,12 @@ declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) { ; CHECK-LABEL: umaxi64: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, hi -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, hi -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, hi +; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: sbcs.w r12, r3, r1 +; CHECK-NEXT: cset r12, lo +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: bx lr %c = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %c @@ -485,13 +482,12 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, hi -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, hi -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, hi +; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: sbcs.w r12, r3, r1 +; CHECK-NEXT: cset r12, lo +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: add sp, #8 @@ -615,13 +611,12 @@ declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) { ; CHECK-LABEL: smini64: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, lt -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, lo -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, lt +; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: sbcs.w r12, r1, r3 +; CHECK-NEXT: cset r12, lt +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: bx lr %c = call i64 @llvm.smin.i64(i64 %a, i64 %b) ret i64 %c @@ -773,13 +768,12 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, lt -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, lo -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, lt +; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: sbcs.w r12, r1, r3 +; CHECK-NEXT: cset r12, lt +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: add sp, #8 @@ -903,13 +897,12 @@ declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) { ; CHECK-LABEL: umini64: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, lo -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, lo -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, lo +; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: sbcs.w r12, r1, r3 +; CHECK-NEXT: cset r12, lo +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: bx lr %c = call i64 @llvm.umin.i64(i64 %a, i64 %b) ret i64 %c @@ -1054,13 +1047,12 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r12, r0, r2, lo -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: csel r0, r0, r2, lo -; CHECK-NEXT: cmp r1, r3 -; CHECK-NEXT: csel r0, r0, r12, eq -; CHECK-NEXT: csel r1, r1, r3, lo +; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: sbcs.w r12, r1, r3 +; CHECK-NEXT: cset r12, lo +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, ne ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: add sp, #8 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll index 8f5ce97..8983807 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll @@ -495,14 +495,14 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: vmov r3, lr, d0 -; CHECK-NEXT: cmp r3, r2 -; CHECK-NEXT: csel r4, r3, r2, lo -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r2, r3, r2, lo -; CHECK-NEXT: csel r3, lr, r12, lo -; CHECK-NEXT: csel r2, r4, r2, eq +; CHECK-NEXT: vmov r12, lr, d1 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: sbcs.w r4, r3, lr +; CHECK-NEXT: cset r4, lo +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r2, r12, ne +; CHECK-NEXT: csel r3, r3, lr, ne ; CHECK-NEXT: subs r4, r2, r0 ; CHECK-NEXT: sbcs.w r4, r3, r1 ; CHECK-NEXT: cset r4, lo @@ -521,14 +521,14 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: vmov r3, lr, d0 -; CHECK-NEXT: cmp r3, r2 -; CHECK-NEXT: csel r4, r3, r2, lo -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r2, r3, r2, lt -; CHECK-NEXT: csel r3, lr, r12, lt -; CHECK-NEXT: csel r2, r4, r2, eq +; CHECK-NEXT: vmov r12, lr, d1 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: sbcs.w r4, r3, lr +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r2, r12, ne +; CHECK-NEXT: csel r3, r3, lr, ne ; CHECK-NEXT: subs r4, r2, r0 ; CHECK-NEXT: sbcs.w r4, r3, r1 ; CHECK-NEXT: cset r4, lt @@ -547,14 +547,14 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: vmov r3, lr, d0 -; CHECK-NEXT: cmp r3, r2 -; CHECK-NEXT: csel r4, r3, r2, hi -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r2, r3, r2, hi -; CHECK-NEXT: csel r3, lr, r12, hi -; CHECK-NEXT: csel r2, r4, r2, eq +; CHECK-NEXT: vmov r12, lr, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: sbcs.w r4, r3, lr +; CHECK-NEXT: cset r4, lo +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r12, r2, ne +; CHECK-NEXT: csel r3, lr, r3, ne ; CHECK-NEXT: subs r4, r0, r2 ; CHECK-NEXT: sbcs.w r4, r1, r3 ; CHECK-NEXT: cset r4, lo @@ -573,14 +573,14 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: vmov r3, lr, d0 -; CHECK-NEXT: cmp r3, r2 -; CHECK-NEXT: csel r4, r3, r2, hi -; CHECK-NEXT: cmp lr, r12 -; CHECK-NEXT: csel r2, r3, r2, gt -; CHECK-NEXT: csel r3, lr, r12, gt -; CHECK-NEXT: csel r2, r4, r2, eq +; CHECK-NEXT: vmov r12, lr, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: sbcs.w r4, r3, lr +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r12, r2, ne +; CHECK-NEXT: csel r3, lr, r3, ne ; CHECK-NEXT: subs r4, r0, r2 ; CHECK-NEXT: sbcs.w r4, r1, r3 ; CHECK-NEXT: cset r4, lt diff --git a/llvm/test/CodeGen/VE/Scalar/smax.ll b/llvm/test/CodeGen/VE/Scalar/smax.ll index 0f551e1..044c956 100644 --- a/llvm/test/CodeGen/VE/Scalar/smax.ll +++ b/llvm/test/CodeGen/VE/Scalar/smax.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=ve | FileCheck %s ;;; Test ‘llvm.smax.*’ intrinsic @@ -70,15 +71,18 @@ define i64 @func_smax_var_i64(i64 noundef %0, i64 noundef %1) { define i128 @func_smax_var_i128(i128 noundef %0, i128 noundef %1) { ; CHECK-LABEL: func_smax_var_i128: ; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.l %s4, %s1, %s3 ; CHECK-NEXT: cmps.l %s5, %s1, %s3 -; CHECK-NEXT: or %s4, 0, %s2 -; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: or %s6, 0, (0)1 +; CHECK-NEXT: or %s7, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s7, (63)0, %s5 ; CHECK-NEXT: cmpu.l %s5, %s0, %s2 -; CHECK-NEXT: cmov.l.gt %s2, %s0, %s5 -; CHECK-NEXT: cmpu.l %s0, %s1, %s3 -; CHECK-NEXT: cmov.l.eq %s4, %s2, %s0 -; CHECK-NEXT: maxs.l %s1, %s1, %s3 -; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: cmov.l.gt %s6, (63)0, %s5 +; CHECK-NEXT: cmov.l.eq %s7, %s6, %s4 +; CHECK-NEXT: cmov.w.ne %s2, %s0, %s7 +; CHECK-NEXT: cmov.w.ne %s3, %s1, %s7 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s1, 0, %s3 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call i128 @llvm.smax.i128(i128 %0, i128 %1) ret i128 %3 @@ -239,13 +243,15 @@ define i64 @func_smax_fore_const_i64(i64 noundef %0) { define i128 @func_smax_fore_const_i128(i128 noundef %0) { ; CHECK-LABEL: func_smax_fore_const_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: cmov.l.le %s2, (56)0, %s1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: or %s4, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s4, (63)0, %s3 ; CHECK-NEXT: cmpu.l %s3, %s0, (56)0 -; CHECK-NEXT: cmov.l.le %s0, (56)0, %s3 -; CHECK-NEXT: cmov.l.eq %s2, %s0, %s1 -; CHECK-NEXT: maxs.l %s1, 0, %s1 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.l.gt %s2, (63)0, %s3 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s1 +; CHECK-NEXT: cmov.w.eq %s0, (56)0, %s4 +; CHECK-NEXT: cmov.w.eq %s1, (0)1, %s4 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 255) ret i128 %2 @@ -298,13 +304,15 @@ define i64 @func_smax_back_const_i64(i64 noundef %0) { define i128 @func_smax_back_const_i128(i128 noundef %0) { ; CHECK-LABEL: func_smax_back_const_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: cmov.l.le %s2, (56)0, %s1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: or %s4, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s4, (63)0, %s3 ; CHECK-NEXT: cmpu.l %s3, %s0, (56)0 -; CHECK-NEXT: cmov.l.le %s0, (56)0, %s3 -; CHECK-NEXT: cmov.l.eq %s2, %s0, %s1 -; CHECK-NEXT: maxs.l %s1, 0, %s1 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.l.gt %s2, (63)0, %s3 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s1 +; CHECK-NEXT: cmov.w.eq %s0, (56)0, %s4 +; CHECK-NEXT: cmov.w.eq %s1, (0)1, %s4 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 255) ret i128 %2 diff --git a/llvm/test/CodeGen/VE/Scalar/smin.ll b/llvm/test/CodeGen/VE/Scalar/smin.ll index a19e167..b58b852 100644 --- a/llvm/test/CodeGen/VE/Scalar/smin.ll +++ b/llvm/test/CodeGen/VE/Scalar/smin.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=ve | FileCheck %s ;;; Test ‘llvm.smin.*’ intrinsic @@ -70,15 +71,18 @@ define i64 @func_smin_var_i64(i64 noundef %0, i64 noundef %1) { define i128 @func_smin_var_i128(i128 noundef %0, i128 noundef %1) { ; CHECK-LABEL: func_smin_var_i128: ; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.l %s4, %s1, %s3 ; CHECK-NEXT: cmps.l %s5, %s1, %s3 -; CHECK-NEXT: or %s4, 0, %s2 -; CHECK-NEXT: cmov.l.lt %s4, %s0, %s5 +; CHECK-NEXT: or %s6, 0, (0)1 +; CHECK-NEXT: or %s7, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s7, (63)0, %s5 ; CHECK-NEXT: cmpu.l %s5, %s0, %s2 -; CHECK-NEXT: cmov.l.lt %s2, %s0, %s5 -; CHECK-NEXT: cmpu.l %s0, %s1, %s3 -; CHECK-NEXT: cmov.l.eq %s4, %s2, %s0 -; CHECK-NEXT: mins.l %s1, %s1, %s3 -; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: cmov.l.lt %s6, (63)0, %s5 +; CHECK-NEXT: cmov.l.eq %s7, %s6, %s4 +; CHECK-NEXT: cmov.w.ne %s2, %s0, %s7 +; CHECK-NEXT: cmov.w.ne %s3, %s1, %s7 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s1, 0, %s3 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call i128 @llvm.smin.i128(i128 %0, i128 %1) ret i128 %3 @@ -133,8 +137,7 @@ define i128 @func_smin_fore_zero_i128(i128 noundef %0) { ; CHECK: # %bb.0: ; CHECK-NEXT: sra.l %s2, %s1, 63 ; CHECK-NEXT: and %s0, %s2, %s0 -; CHECK-NEXT: cmov.l.eq %s0, (0)1, %s1 -; CHECK-NEXT: mins.l %s1, 0, %s1 +; CHECK-NEXT: and %s1, %s2, %s1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 0) ret i128 %2 @@ -189,8 +192,7 @@ define i128 @func_smin_back_zero_i128(i128 noundef %0) { ; CHECK: # %bb.0: ; CHECK-NEXT: sra.l %s2, %s1, 63 ; CHECK-NEXT: and %s0, %s2, %s0 -; CHECK-NEXT: cmov.l.eq %s0, (0)1, %s1 -; CHECK-NEXT: mins.l %s1, 0, %s1 +; CHECK-NEXT: and %s1, %s2, %s1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 0) ret i128 %2 @@ -243,13 +245,15 @@ define i64 @func_smin_fore_const_i64(i64 noundef %0) { define i128 @func_smin_fore_const_i128(i128 noundef %0) { ; CHECK-LABEL: func_smin_fore_const_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: cmov.l.ge %s2, (56)0, %s1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: or %s4, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s4, (63)0, %s3 ; CHECK-NEXT: cmpu.l %s3, %s0, (56)0 -; CHECK-NEXT: cmov.l.ge %s0, (56)0, %s3 -; CHECK-NEXT: cmov.l.eq %s2, %s0, %s1 -; CHECK-NEXT: mins.l %s1, 0, %s1 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.l.lt %s2, (63)0, %s3 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s1 +; CHECK-NEXT: cmov.w.eq %s0, (56)0, %s4 +; CHECK-NEXT: cmov.w.eq %s1, (0)1, %s4 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 255) ret i128 %2 @@ -302,13 +306,15 @@ define i64 @func_smin_back_const_i64(i64 noundef %0) { define i128 @func_smin_back_const_i128(i128 noundef %0) { ; CHECK-LABEL: func_smin_back_const_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: cmov.l.ge %s2, (56)0, %s1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: or %s4, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s4, (63)0, %s3 ; CHECK-NEXT: cmpu.l %s3, %s0, (56)0 -; CHECK-NEXT: cmov.l.ge %s0, (56)0, %s3 -; CHECK-NEXT: cmov.l.eq %s2, %s0, %s1 -; CHECK-NEXT: mins.l %s1, 0, %s1 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.l.lt %s2, (63)0, %s3 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s1 +; CHECK-NEXT: cmov.w.eq %s0, (56)0, %s4 +; CHECK-NEXT: cmov.w.eq %s1, (0)1, %s4 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 255) ret i128 %2 diff --git a/llvm/test/CodeGen/VE/Scalar/umax.ll b/llvm/test/CodeGen/VE/Scalar/umax.ll index 4eb3034..0c4fe0a 100644 --- a/llvm/test/CodeGen/VE/Scalar/umax.ll +++ b/llvm/test/CodeGen/VE/Scalar/umax.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=ve | FileCheck %s ;;; Test ‘llvm.umax.*’ intrinsic @@ -84,14 +85,16 @@ define i64 @func_umax_var_u64(i64 noundef %0, i64 noundef %1) { define i128 @func_umax_var_u128(i128 noundef %0, i128 noundef %1) { ; CHECK-LABEL: func_umax_var_u128: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpu.l %s5, %s1, %s3 -; CHECK-NEXT: or %s4, 0, %s2 -; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 -; CHECK-NEXT: cmpu.l %s6, %s0, %s2 -; CHECK-NEXT: cmov.l.gt %s2, %s0, %s6 -; CHECK-NEXT: cmov.l.eq %s4, %s2, %s5 -; CHECK-NEXT: cmov.l.gt %s3, %s1, %s5 -; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: cmpu.l %s4, %s1, %s3 +; CHECK-NEXT: or %s5, 0, (0)1 +; CHECK-NEXT: or %s6, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s6, (63)0, %s4 +; CHECK-NEXT: cmpu.l %s7, %s0, %s2 +; CHECK-NEXT: cmov.l.gt %s5, (63)0, %s7 +; CHECK-NEXT: cmov.l.eq %s6, %s5, %s4 +; CHECK-NEXT: cmov.w.ne %s2, %s0, %s6 +; CHECK-NEXT: cmov.w.ne %s3, %s1, %s6 +; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s1, 0, %s3 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call i128 @llvm.umax.i128(i128 %0, i128 %1) diff --git a/llvm/test/CodeGen/VE/Scalar/umin.ll b/llvm/test/CodeGen/VE/Scalar/umin.ll index 21cb2a6..416ed73 100644 --- a/llvm/test/CodeGen/VE/Scalar/umin.ll +++ b/llvm/test/CodeGen/VE/Scalar/umin.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=ve | FileCheck %s ;;; Test ‘llvm.umin.*’ intrinsic @@ -84,14 +85,16 @@ define i64 @func_umin_var_u64(i64 noundef %0, i64 noundef %1) { define i128 @func_umin_var_u128(i128 noundef %0, i128 noundef %1) { ; CHECK-LABEL: func_umin_var_u128: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpu.l %s5, %s1, %s3 -; CHECK-NEXT: or %s4, 0, %s2 -; CHECK-NEXT: cmov.l.lt %s4, %s0, %s5 -; CHECK-NEXT: cmpu.l %s6, %s0, %s2 -; CHECK-NEXT: cmov.l.lt %s2, %s0, %s6 -; CHECK-NEXT: cmov.l.eq %s4, %s2, %s5 -; CHECK-NEXT: cmov.l.lt %s3, %s1, %s5 -; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: cmpu.l %s4, %s1, %s3 +; CHECK-NEXT: or %s5, 0, (0)1 +; CHECK-NEXT: or %s6, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s6, (63)0, %s4 +; CHECK-NEXT: cmpu.l %s7, %s0, %s2 +; CHECK-NEXT: cmov.l.lt %s5, (63)0, %s7 +; CHECK-NEXT: cmov.l.eq %s6, %s5, %s4 +; CHECK-NEXT: cmov.w.ne %s2, %s0, %s6 +; CHECK-NEXT: cmov.w.ne %s3, %s1, %s6 +; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s1, 0, %s3 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call i128 @llvm.umin.i128(i128 %0, i128 %1) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 531b0d3..58e3f0d 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -574,16 +574,11 @@ define i64 @utest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -678,7 +673,6 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -688,10 +682,6 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -862,7 +852,6 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -872,10 +861,6 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -1497,16 +1482,11 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -1541,7 +1521,6 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1551,10 +1530,6 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -1609,16 +1584,11 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui float %x to i128 @@ -1653,7 +1623,6 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1663,10 +1632,6 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -1725,16 +1690,11 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i128 @@ -1771,7 +1731,6 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1781,10 +1740,6 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 6577754..8f85575 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -2189,36 +2189,30 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i32.select +; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 4 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.eq +; CHECK-NEXT: i32.select ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 3 @@ -2226,36 +2220,30 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i32.select +; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.eq +; CHECK-NEXT: i32.select ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return @@ -2309,27 +2297,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2382,7 +2360,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2392,10 +2369,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -2403,7 +2376,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2413,10 +2385,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -2479,36 +2447,30 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i32.select +; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 4 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.eq +; CHECK-NEXT: i32.select ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 3 @@ -2516,36 +2478,30 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i32.select +; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.eq +; CHECK-NEXT: i32.select ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return @@ -2599,27 +2555,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2672,7 +2618,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2682,10 +2627,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -2693,7 +2634,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2703,10 +2643,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -2771,36 +2707,30 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i32.select +; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 6 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.eq +; CHECK-NEXT: i32.select ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 4 @@ -2808,36 +2738,30 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 9223372036854775807 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i32.select +; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const -9223372036854775808 ; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.eq +; CHECK-NEXT: i32.select ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return @@ -2893,27 +2817,17 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2968,7 +2882,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 @@ -2978,10 +2891,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 @@ -2989,7 +2898,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 @@ -2999,10 +2907,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.select ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64.const 0 diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll index c527368..daed112 100644 --- a/llvm/test/CodeGen/X86/abds.ll +++ b/llvm/test/CodeGen/X86/abds.ll @@ -367,23 +367,21 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %eax, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: cmovbl %esi, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: cmoval %esi, %ebx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: cmovll %esi, %ebp -; X86-NEXT: cmovel %edi, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: cmovll %ecx, %edi -; X86-NEXT: cmovgl %esi, %eax -; X86-NEXT: cmovel %ebx, %eax -; X86-NEXT: cmovgl %ecx, %edx -; X86-NEXT: subl %ebp, %eax +; X86-NEXT: cmovll %esi, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: cmovll %ecx, %ebx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: sbbl %esi, %ebp +; X86-NEXT: cmovll %esi, %edx +; X86-NEXT: cmovll %ecx, %eax +; X86-NEXT: subl %ebx, %eax ; X86-NEXT: sbbl %edi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index f98daa1..195a74d 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -355,23 +355,21 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %eax, %esi -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: cmovbl %esi, %edi ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: cmoval %esi, %ebx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: cmovbl %esi, %ebp -; X86-NEXT: cmovel %edi, %ebp -; X86-NEXT: movl %edx, %edi -; X86-NEXT: cmovbl %ecx, %edi -; X86-NEXT: cmoval %esi, %eax -; X86-NEXT: cmovel %ebx, %eax -; X86-NEXT: cmoval %ecx, %edx -; X86-NEXT: subl %ebp, %eax +; X86-NEXT: cmovbl %ecx, %ebx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: sbbl %esi, %ebp +; X86-NEXT: cmovbl %esi, %edx +; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: subl %ebx, %eax ; X86-NEXT: sbbl %edi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index ac6d9d3..87e9a58 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -1081,8 +1081,6 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1101,11 +1099,9 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: callq __fixdfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi -; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx @@ -1147,8 +1143,6 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1167,11 +1161,9 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: callq __fixsfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi -; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx @@ -1221,8 +1213,6 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1241,11 +1231,9 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: callq __fixhfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi -; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 37986f6..b319d4f 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -2670,39 +2670,29 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: movq %rdx, %r14 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __fixdfti@PLT -; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rax, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovnsq %rcx, %rax -; CHECK-NEXT: cmoveq %rsi, %rax -; CHECK-NEXT: cmovnsq %rdi, %rdx -; CHECK-NEXT: cmpq %rcx, %rbx -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rbx, %rsi -; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovsq %rbx, %rcx -; CHECK-NEXT: cmoveq %rsi, %rcx -; CHECK-NEXT: cmovsq %r14, %rdi -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000 -; CHECK-NEXT: movq %rsi, %r8 -; CHECK-NEXT: cmovnsq %rcx, %r8 -; CHECK-NEXT: cmpq %rsi, %rcx -; CHECK-NEXT: cmovbeq %rsi, %rcx -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: cmovneq %r8, %rcx -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: cmovnsq %rax, %rdi +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF ; CHECK-NEXT: cmpq %rsi, %rax -; CHECK-NEXT: cmovbeq %rsi, %rax -; CHECK-NEXT: cmpq $-1, %rdx -; CHECK-NEXT: cmovneq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rdx +; CHECK-NEXT: cmovgeq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rbx +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: cmovlq %r14, %rcx +; CHECK-NEXT: cmovlq %rbx, %rsi +; CHECK-NEXT: movabsq $-9223372036854775808, %rdi # imm = 0x8000000000000000 +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: movq $-1, %r8 +; CHECK-NEXT: movq $-1, %r9 +; CHECK-NEXT: sbbq %rcx, %r9 +; CHECK-NEXT: cmovgeq %rdi, %rsi +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sbbq %rdx, %r8 +; CHECK-NEXT: cmovgeq %rdi, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movq %rcx, %xmm1 +; CHECK-NEXT: movq %rsi, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 @@ -2740,12 +2730,8 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2783,20 +2769,15 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: callq __fixdfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: cmovleq %rdx, %rdi ; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: cmovgq %rsi, %rdx ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: cmovgq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx +; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 @@ -2834,39 +2815,29 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: movq %rdx, %r14 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __fixsfti@PLT -; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rax, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovnsq %rcx, %rax -; CHECK-NEXT: cmoveq %rsi, %rax -; CHECK-NEXT: cmovnsq %rdi, %rdx -; CHECK-NEXT: cmpq %rcx, %rbx -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rbx, %rsi -; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovsq %rbx, %rcx -; CHECK-NEXT: cmoveq %rsi, %rcx -; CHECK-NEXT: cmovsq %r14, %rdi -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000 -; CHECK-NEXT: movq %rsi, %r8 -; CHECK-NEXT: cmovnsq %rcx, %r8 -; CHECK-NEXT: cmpq %rsi, %rcx -; CHECK-NEXT: cmovbeq %rsi, %rcx -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: cmovneq %r8, %rcx -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: cmovnsq %rax, %rdi +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF ; CHECK-NEXT: cmpq %rsi, %rax -; CHECK-NEXT: cmovbeq %rsi, %rax -; CHECK-NEXT: cmpq $-1, %rdx -; CHECK-NEXT: cmovneq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rdx +; CHECK-NEXT: cmovgeq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rbx +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: cmovlq %r14, %rcx +; CHECK-NEXT: cmovlq %rbx, %rsi +; CHECK-NEXT: movabsq $-9223372036854775808, %rdi # imm = 0x8000000000000000 +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: movq $-1, %r8 +; CHECK-NEXT: movq $-1, %r9 +; CHECK-NEXT: sbbq %rcx, %r9 +; CHECK-NEXT: cmovgeq %rdi, %rsi +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sbbq %rdx, %r8 +; CHECK-NEXT: cmovgeq %rdi, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movq %rcx, %xmm1 +; CHECK-NEXT: movq %rsi, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 @@ -2904,12 +2875,8 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2947,20 +2914,15 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: callq __fixsfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: cmovleq %rdx, %rdi ; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: cmovgq %rsi, %rdx ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: cmovgq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx +; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 @@ -2998,39 +2960,29 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: movq %rdx, %r14 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __fixhfti@PLT -; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rax, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovnsq %rcx, %rax -; CHECK-NEXT: cmoveq %rsi, %rax -; CHECK-NEXT: cmovnsq %rdi, %rdx -; CHECK-NEXT: cmpq %rcx, %rbx -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rbx, %rsi -; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovsq %rbx, %rcx -; CHECK-NEXT: cmoveq %rsi, %rcx -; CHECK-NEXT: cmovsq %r14, %rdi -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000 -; CHECK-NEXT: movq %rsi, %r8 -; CHECK-NEXT: cmovnsq %rcx, %r8 -; CHECK-NEXT: cmpq %rsi, %rcx -; CHECK-NEXT: cmovbeq %rsi, %rcx -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: cmovneq %r8, %rcx -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: cmovnsq %rax, %rdi +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF ; CHECK-NEXT: cmpq %rsi, %rax -; CHECK-NEXT: cmovbeq %rsi, %rax -; CHECK-NEXT: cmpq $-1, %rdx -; CHECK-NEXT: cmovneq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rdx +; CHECK-NEXT: cmovgeq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rbx +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: cmovlq %r14, %rcx +; CHECK-NEXT: cmovlq %rbx, %rsi +; CHECK-NEXT: movabsq $-9223372036854775808, %rdi # imm = 0x8000000000000000 +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: movq $-1, %r8 +; CHECK-NEXT: movq $-1, %r9 +; CHECK-NEXT: sbbq %rcx, %r9 +; CHECK-NEXT: cmovgeq %rdi, %rsi +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sbbq %rdx, %r8 +; CHECK-NEXT: cmovgeq %rdi, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movq %rcx, %xmm1 +; CHECK-NEXT: movq %rsi, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 @@ -3069,12 +3021,8 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -3112,20 +3060,15 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: callq __fixhfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: cmovleq %rdx, %rdi ; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: cmovgq %rsi, %rdx ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: cmovgq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx +; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index 0d3d325..a308f85 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -309,50 +309,50 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: subq $24, %rsp ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: leaq (%rdi,%rdi), %rax -; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: sarq $63, %r15 +; X64-NEXT: shldq $31, %rax, %r15 +; X64-NEXT: shlq $32, %r14 +; X64-NEXT: movq %rsi, %r12 ; X64-NEXT: sarq $63, %r12 -; X64-NEXT: shldq $31, %rax, %r12 -; X64-NEXT: shlq $32, %r15 -; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: sarq $63, %r13 -; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi -; X64-NEXT: movq %r13, %rcx +; X64-NEXT: movq %r14, %rdi +; X64-NEXT: movq %r15, %rsi +; X64-NEXT: movq %r12, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %rbx +; X64-NEXT: subq $1, %r13 ; X64-NEXT: sbbq $0, %rbp -; X64-NEXT: testq %r12, %r12 +; X64-NEXT: testq %r15, %r15 ; X64-NEXT: sets %al -; X64-NEXT: testq %r13, %r13 -; X64-NEXT: sets %r14b -; X64-NEXT: xorb %al, %r14b -; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi +; X64-NEXT: testq %r12, %r12 +; X64-NEXT: sets %bl +; X64-NEXT: xorb %al, %bl +; X64-NEXT: movq %r14, %rdi +; X64-NEXT: movq %r15, %rsi ; X64-NEXT: movq (%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq %r13, %rcx +; X64-NEXT: movq %r12, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al -; X64-NEXT: testb %r14b, %al +; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testq %rbp, %rbp -; X64-NEXT: cmovnsq %rax, %rbp -; X64-NEXT: movq $-1, %rcx -; X64-NEXT: cmovgq %rcx, %rbx +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: movq %rbp, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: andq %rbp, %rcx ; X64-NEXT: testq %rbp, %rbp -; X64-NEXT: cmovnsq %rbp, %rcx -; X64-NEXT: cmpq $-1, %rbp -; X64-NEXT: cmovlq %rax, %rbx -; X64-NEXT: shrdq $1, %rcx, %rbx -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq $-1, %rdx +; X64-NEXT: cmovgq %rdx, %r13 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq $-1, %rcx +; X64-NEXT: cmovlq %rdx, %rcx +; X64-NEXT: cmovgeq %r13, %rax +; X64-NEXT: shrdq $1, %rcx, %rax ; X64-NEXT: addq $24, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 @@ -398,22 +398,20 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %esi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl $0, %eax +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ebx ; X86-NEXT: testl %edi, %edi ; X86-NEXT: sets %al ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -440,52 +438,38 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: andl %ecx, %edi -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovel %ebx, %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: cmpl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sbbl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: sbbl $0, %ecx ; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: cmovbl %ecx, %edx -; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmovll %eax, %edx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovgel %ecx, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: cmovgel %ecx, %ebx ; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovsl %ecx, %esi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: cmovnel %eax, %esi -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmpl $-2147483647, %edx # imm = 0x80000001 +; X86-NEXT: cmovgel %ecx, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000 +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl $-1, %edi +; X86-NEXT: sbbl %ebx, %edi +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovgel %eax, %esi ; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: andl %esi, %ecx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 -; X86-NEXT: cmovsl %ebx, %edx -; X86-NEXT: movl $0, %ebx -; X86-NEXT: cmovsl %ebx, %esi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: cmovel %eax, %edx +; X86-NEXT: cmovgel %eax, %edx ; X86-NEXT: movl %esi, %eax ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi @@ -574,7 +558,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: subq $104, %rsp +; X64-NEXT: subq $120, %rsp ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] @@ -586,9 +570,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %rbp -; X64-NEXT: sarq $63, %rbp -; X64-NEXT: shldq $31, %rbx, %rbp +; X64-NEXT: movq %rbx, %r13 +; X64-NEXT: sarq $63, %r13 +; X64-NEXT: shldq $31, %rbx, %r13 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtd %xmm1, %xmm0 @@ -601,19 +585,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rbx, %r12 ; X64-NEXT: shlq $31, %r12 ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %r13 +; X64-NEXT: subq $1, %rbp ; X64-NEXT: sbbq $0, %r14 ; X64-NEXT: shrq $63, %rbx ; X64-NEXT: xorl %r15d, %ebx ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __modti3@PLT @@ -621,32 +605,28 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rdx, %r13 -; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: cmovbq %r13, %rax +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovnsq %rdx, %r13 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: cmovnsq %rcx, %r14 +; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF +; X64-NEXT: cmpq %rdx, %rbp +; X64-NEXT: movq %r14, %rax +; X64-NEXT: sbbq $0, %rax +; X64-NEXT: cmovgeq %rcx, %r14 +; X64-NEXT: cmovgeq %rdx, %rbp ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: cmovaq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovsq %rcx, %r13 -; X64-NEXT: cmpq $-1, %r14 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: movq %r13, %xmm0 +; X64-NEXT: cmpq %rbp, %rcx +; X64-NEXT: movq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: movq $-1, %rax +; X64-NEXT: sbbq %r14, %rax +; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: movq %rbp, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] ; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %rbp -; X64-NEXT: sarq $63, %rbp -; X64-NEXT: shldq $31, %rbx, %rbp +; X64-NEXT: movq %rbx, %r13 +; X64-NEXT: sarq $63, %r13 +; X64-NEXT: shldq $31, %rbx, %r13 ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] ; X64-NEXT: movq %xmm0, %rdx @@ -656,19 +636,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rbx, %r12 ; X64-NEXT: shlq $31, %r12 ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %r13 +; X64-NEXT: subq $1, %rbp ; X64-NEXT: sbbq $0, %r14 ; X64-NEXT: shrq $63, %rbx ; X64-NEXT: xorl %r15d, %ebx ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __modti3@PLT @@ -676,25 +656,20 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: cmovbq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovnsq %rcx, %r13 -; X64-NEXT: cmoveq %rax, %r13 +; X64-NEXT: cmpq %rcx, %rbp +; X64-NEXT: movq %r14, %rax +; X64-NEXT: sbbq $0, %rax ; X64-NEXT: movl $0, %eax -; X64-NEXT: cmovnsq %rax, %r14 +; X64-NEXT: cmovgeq %rax, %r14 +; X64-NEXT: cmovgeq %rcx, %rbp ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: cmovaq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovsq %rcx, %r13 -; X64-NEXT: cmpq $-1, %r14 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: movq %r13, %xmm0 +; X64-NEXT: cmpq %rbp, %rcx +; X64-NEXT: movq $-1, %rax +; X64-NEXT: sbbq %r14, %rax +; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: movq %rbp, %xmm0 ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: psrlq $1, %xmm1 @@ -709,9 +684,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %rbp -; X64-NEXT: sarq $63, %rbp -; X64-NEXT: shldq $31, %rbx, %rbp +; X64-NEXT: movq %rbx, %r13 +; X64-NEXT: sarq $63, %r13 +; X64-NEXT: shldq $31, %rbx, %r13 ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; X64-NEXT: pxor %xmm1, %xmm1 ; X64-NEXT: pcmpgtd %xmm0, %xmm1 @@ -724,19 +699,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rbx, %r12 ; X64-NEXT: shlq $31, %r12 ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %r13 +; X64-NEXT: subq $1, %rbp ; X64-NEXT: sbbq $0, %r14 ; X64-NEXT: shrq $63, %rbx ; X64-NEXT: xorl %r15d, %ebx ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __modti3@PLT @@ -744,32 +719,27 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: cmovbq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovnsq %rcx, %r13 -; X64-NEXT: cmoveq %rax, %r13 +; X64-NEXT: cmpq %rcx, %rbp +; X64-NEXT: movq %r14, %rax +; X64-NEXT: sbbq $0, %rax ; X64-NEXT: movl $0, %eax -; X64-NEXT: cmovnsq %rax, %r14 +; X64-NEXT: cmovgeq %rax, %r14 +; X64-NEXT: cmovgeq %rcx, %rbp ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: cmovaq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovsq %rcx, %r13 -; X64-NEXT: cmpq $-1, %r14 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: movq %r13, %xmm0 +; X64-NEXT: cmpq %rbp, %rcx +; X64-NEXT: movq $-1, %rax +; X64-NEXT: sbbq %r14, %rax +; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: movq %rbp, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] ; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %rbp -; X64-NEXT: sarq $63, %rbp -; X64-NEXT: shldq $31, %rbx, %rbp +; X64-NEXT: movq %rbx, %r13 +; X64-NEXT: sarq $63, %r13 +; X64-NEXT: shldq $31, %rbx, %r13 ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] ; X64-NEXT: movq %xmm0, %rdx @@ -779,19 +749,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rbx, %r12 ; X64-NEXT: shlq $31, %r12 ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %r13 +; X64-NEXT: subq $1, %rbp ; X64-NEXT: sbbq $0, %r14 ; X64-NEXT: shrq $63, %rbx ; X64-NEXT: xorl %r15d, %ebx ; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %rbp, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %r15, %rcx ; X64-NEXT: callq __modti3@PLT @@ -799,31 +769,25 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: cmovbq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovnsq %rcx, %r13 -; X64-NEXT: cmoveq %rax, %r13 +; X64-NEXT: cmpq %rcx, %rbp +; X64-NEXT: movq %r14, %rax +; X64-NEXT: sbbq $0, %rax ; X64-NEXT: movl $0, %eax -; X64-NEXT: cmovnsq %rax, %r14 -; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rcx, %r13 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: cmovaq %r13, %rax -; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovsq %rcx, %r13 -; X64-NEXT: cmpq $-1, %r14 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: movq %r13, %xmm1 +; X64-NEXT: cmovgeq %rax, %r14 +; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 +; X64-NEXT: cmpq %rbp, %rax +; X64-NEXT: sbbq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: cmovgeq %rax, %rbp +; X64-NEXT: movq %rbp, %xmm1 ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: psrlq $1, %xmm0 ; X64-NEXT: shufps $136, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = xmm0[0,2],mem[0,2] -; X64-NEXT: addq $104, %rsp +; X64-NEXT: addq $120, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 ; X64-NEXT: popq %r13 @@ -840,49 +804,86 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $256, %esp # imm = 0x100 -; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: subl $192, %esp +; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: movl 16(%ebp), %ebx ; X86-NEXT: movl 32(%ebp), %eax -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: leal (%edi,%edi), %eax -; X86-NEXT: shrl $31, %edi -; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal (%ebx,%ebx), %eax +; X86-NEXT: shrl $31, %ebx +; X86-NEXT: shldl $31, %eax, %ebx ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax +; X86-NEXT: calll __modti3 +; X86-NEXT: addl $32, %esp +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: leal (%ecx,%ecx), %edx +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %edx, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %edx +; X86-NEXT: calll __divti3 +; X86-NEXT: addl $32, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 32(%ebp) -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 36(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %edx ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: sarl $31, %edi ; X86-NEXT: leal (%ecx,%ecx), %eax ; X86-NEXT: shrl $31, %ecx ; X86-NEXT: shldl $31, %eax, %ecx @@ -892,80 +893,43 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 28(%ebp), %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 +; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: movl 40(%ebp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: sarl $31, %esi +; X86-NEXT: sarl $31, %edx ; X86-NEXT: movl 24(%ebp), %ecx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: leal (%ecx,%ecx), %esi ; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: shldl $31, %esi, %ecx +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl $0 +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %eax ; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 40(%ebp) -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl 36(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload @@ -976,108 +940,199 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %edx -; X86-NEXT: sbbl $0, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: testl %ebx, %ebx ; X86-NEXT: sets %bl -; X86-NEXT: testl %edi, %edi +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bh ; X86-NEXT: xorb %bl, %bh ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %eax -; X86-NEXT: setne %al -; X86-NEXT: testb %bh, %al -; X86-NEXT: cmovel %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: setne %bl +; X86-NEXT: testb %bh, %bl +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sbbl $0, %esi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sbbl $0, %esi +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: sbbl $0, %esi +; X86-NEXT: cmovgel %edi, %ebx +; X86-NEXT: cmovgel %edi, %edx +; X86-NEXT: cmovgel %edi, %ecx +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovgel %esi, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: negl %esi +; X86-NEXT: movl $-1, %esi +; X86-NEXT: sbbl %ecx, %esi +; X86-NEXT: movl $-1, %esi +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: movl $-1, %edx +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: cmovgel %edi, %eax +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovgel %edx, %ecx +; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bl ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bh ; X86-NEXT: xorb %bl, %bh +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %eax -; X86-NEXT: setne %al -; X86-NEXT: testb %bh, %al -; X86-NEXT: cmovel %esi, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: setne %cl +; X86-NEXT: testb %bh, %cl +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovgel %ecx, %esi +; X86-NEXT: cmovgel %ecx, %edx +; X86-NEXT: cmovgel %ecx, %edi +; X86-NEXT: movl $-1, %ebx +; X86-NEXT: cmovgel %ebx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovgel %ecx, %eax +; X86-NEXT: cmovgel %ebx, %edi +; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %esi +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %bl +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %bh +; X86-NEXT: xorb %bl, %bh +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: setne %cl +; X86-NEXT: testb %bh, %cl +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovgel %ecx, %esi +; X86-NEXT: cmovgel %ecx, %edx +; X86-NEXT: cmovgel %ecx, %ebx +; X86-NEXT: movl $-1, %edi +; X86-NEXT: cmovgel %edi, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovgel %ecx, %eax +; X86-NEXT: cmovgel %edi, %ebx +; X86-NEXT: shldl $31, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $1, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx -; X86-NEXT: sets %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %bl -; X86-NEXT: xorb %al, %bl +; X86-NEXT: sets %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %ah +; X86-NEXT: xorb %al, %ah +; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl 28(%ebp) ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edx ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax @@ -1089,213 +1144,41 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al -; X86-NEXT: testb %bl, %al -; X86-NEXT: cmovel %edi, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %ebx +; X86-NEXT: movl %edi, %eax ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %edx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %bl -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %bh -; X86-NEXT: xorb %bl, %bh -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %eax -; X86-NEXT: setne %al -; X86-NEXT: testb %bh, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %esi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %edx, %edx ; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovgel %eax, %esi +; X86-NEXT: cmovgel %eax, %ecx +; X86-NEXT: cmovgel %eax, %edi +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovgel %edx, %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: negl %eax ; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovsl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edi, %eax ; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovsl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %ecx, %eax ; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: andl %eax, %ebx -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ecx, %ecx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovnel %esi, %ecx -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovel %edx, %ebx -; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovsl %edx, %ecx -; X86-NEXT: movl $-1, %edx -; X86-NEXT: cmovsl %edx, %ebx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: cmovel %esi, %ecx -; X86-NEXT: cmovnel %ebx, %eax -; X86-NEXT: shldl $31, %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: andl %eax, %edi -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovel %esi, %edi -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl $-1, %ebx -; X86-NEXT: cmovsl %ebx, %edi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %ecx -; X86-NEXT: cmovel %edx, %eax -; X86-NEXT: cmovnel %edi, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: andl %eax, %edx -; X86-NEXT: negl %eax +; X86-NEXT: sbbl %esi, %eax ; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmovel %esi, %edx -; X86-NEXT: cmpl $-1, %edx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl $-1, %edi -; X86-NEXT: cmovsl %edi, %edx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %edx, %ebx -; X86-NEXT: shldl $31, %eax, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: andl %eax, %edi -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmovel %esi, %edi -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovsl %esi, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: andl %edx, %esi -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %edi, %esi -; X86-NEXT: shldl $31, %eax, %esi +; X86-NEXT: cmovgel %eax, %ebx +; X86-NEXT: cmovgel %edx, %edi +; X86-NEXT: shldl $31, %ebx, %edi ; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, 4(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll index 838ef46..d6906b5 100644 --- a/llvm/test/CodeGen/X86/smax.ll +++ b/llvm/test/CodeGen/X86/smax.ll @@ -120,17 +120,15 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: cmoval %ecx, %edi -; X86-NEXT: cmpl %edx, %esi -; X86-NEXT: cmovgl %ecx, %eax -; X86-NEXT: cmovel %edi, %eax -; X86-NEXT: cmovgl %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: cmovll %ecx, %eax +; X86-NEXT: cmovll %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -141,13 +139,12 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: cmpq %rdx, %rdi ; X64-NEXT: movq %rdx, %rax -; X64-NEXT: cmovaq %rdi, %rax -; X64-NEXT: cmpq %rcx, %rsi -; X64-NEXT: cmovgq %rdi, %rdx -; X64-NEXT: cmovneq %rdx, %rax -; X64-NEXT: cmovgq %rsi, %rcx +; X64-NEXT: cmpq %rdi, %rdx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: sbbq %rsi, %rdx +; X64-NEXT: cmovlq %rdi, %rax +; X64-NEXT: cmovlq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq ; @@ -157,52 +154,28 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %ecx, %esi +; X86-NEXT: cmpl %ebx, %edx +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmoval %esi, %eax -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: cmoval %esi, %ebp -; X86-NEXT: cmovel %eax, %ebp -; X86-NEXT: movl %edx, %eax -; X86-NEXT: cmoval %edi, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: cmovll %ebx, %edx +; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmovll %ebp, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: sbbl %ebx, %edi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: xorl %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %esi, %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: cmovel %ebp, %ecx -; X86-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpl %esi, %ebx -; X86-NEXT: movl %esi, %edi -; X86-NEXT: cmoval %ebx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %eax, %ebp -; X86-NEXT: cmovgl %ebx, %esi -; X86-NEXT: cmovel %edi, %esi -; X86-NEXT: cmovgl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %eax, 12(%edi) -; X86-NEXT: movl %esi, 8(%edi) -; X86-NEXT: movl %edx, 4(%edi) -; X86-NEXT: movl %ecx, (%edi) -; X86-NEXT: movl %edi, %eax -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -748,24 +721,22 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: shrdl $28, %edi, %ecx ; X86-NEXT: sarl $28, %edi -; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: cmoval %esi, %ebx -; X86-NEXT: cmpl %edi, %edx -; X86-NEXT: cmovgl %esi, %ecx -; X86-NEXT: cmovel %ebx, %ecx -; X86-NEXT: cmovgl %edx, %edi +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: cmovll %edx, %ecx +; X86-NEXT: cmovll %esi, %edi ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll index 9436676..2b05955 100644 --- a/llvm/test/CodeGen/X86/smin.ll +++ b/llvm/test/CodeGen/X86/smin.ll @@ -125,11 +125,9 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: cmovbl %ecx, %edi -; X86-NEXT: cmpl %edx, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: cmovll %ecx, %eax -; X86-NEXT: cmovel %edi, %eax ; X86-NEXT: cmovll %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -141,12 +139,11 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: cmpq %rdx, %rdi ; X64-NEXT: movq %rdx, %rax -; X64-NEXT: cmovbq %rdi, %rax -; X64-NEXT: cmpq %rcx, %rsi -; X64-NEXT: cmovlq %rdi, %rdx -; X64-NEXT: cmovneq %rdx, %rax +; X64-NEXT: cmpq %rdx, %rdi +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: sbbq %rcx, %rdx +; X64-NEXT: cmovlq %rdi, %rax ; X64-NEXT: cmovlq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq @@ -157,51 +154,29 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %edx, %eax -; X86-NEXT: cmovbl %edi, %eax -; X86-NEXT: cmpl %esi, %ebp -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: cmovbl %edi, %ebx -; X86-NEXT: cmovel %eax, %ebx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: cmovbl %ebp, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %ecx, %edi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmovbl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl %edx, %ebx +; X86-NEXT: sbbl %esi, %ebp ; X86-NEXT: movl %eax, %ebp -; X86-NEXT: sbbl %edi, %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: cmovll %ebx, %edx ; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: xorl %edi, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: orl %ebp, %eax -; X86-NEXT: cmovel %ebx, %edx -; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl %edi, %eax ; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovll %eax, %edi +; X86-NEXT: cmovll %edi, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %ebp, 12(%eax) ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $8, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -747,24 +722,22 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: shrdl $28, %edi, %ecx ; X86-NEXT: sarl $28, %edi -; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: cmovbl %esi, %ebx -; X86-NEXT: cmpl %edi, %edx -; X86-NEXT: cmovll %esi, %ecx -; X86-NEXT: cmovel %ebx, %ecx -; X86-NEXT: cmovll %edx, %edi +; X86-NEXT: cmpl %ecx, %edx +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: cmovll %edx, %ecx +; X86-NEXT: cmovll %esi, %edi ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll index 4763526..d4d18c4 100644 --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -182,7 +182,6 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: cmpq $2, %rdx ; X64-NEXT: movq $-1, %rcx ; X64-NEXT: cmovaeq %rcx, %rax -; X64-NEXT: cmpq $1, %rdx ; X64-NEXT: movl $1, %ecx ; X64-NEXT: cmovbq %rdx, %rcx ; X64-NEXT: shrdq $1, %rcx, %rax @@ -427,20 +426,17 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: cmpl $2, %esi ; X86-NEXT: movl $-1, %edx ; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: cmpl $1, %esi ; X86-NEXT: movl $1, %ebp ; X86-NEXT: cmovael %ebp, %esi ; X86-NEXT: shldl $31, %eax, %esi ; X86-NEXT: cmpl $2, %ebx ; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: cmpl $1, %ebx ; X86-NEXT: cmovael %ebp, %ebx ; X86-NEXT: shldl $31, %eax, %ebx ; X86-NEXT: cmpl $2, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: cmpl $1, %edi ; X86-NEXT: cmovael %ebp, %edi ; X86-NEXT: shldl $31, %eax, %edi ; X86-NEXT: pushl $0 @@ -452,7 +448,6 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: cmpl $2, %edx ; X86-NEXT: movl $-1, %ecx ; X86-NEXT: cmovael %ecx, %eax -; X86-NEXT: cmpl $1, %edx ; X86-NEXT: cmovbl %edx, %ebp ; X86-NEXT: shldl $31, %eax, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll index f86f8ea..e6b39eb 100644 --- a/llvm/test/CodeGen/X86/umax.ll +++ b/llvm/test/CodeGen/X86/umax.ll @@ -175,17 +175,15 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: cmoval %ecx, %edi -; X86-NEXT: cmpl %edx, %esi -; X86-NEXT: cmoval %ecx, %eax -; X86-NEXT: cmovel %edi, %eax -; X86-NEXT: cmoval %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: cmovbl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -222,13 +220,12 @@ define i64 @test_i64_1(i64 %a) nounwind { define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: cmpq %rdx, %rdi ; X64-NEXT: movq %rdx, %rax -; X64-NEXT: cmovaq %rdi, %rax -; X64-NEXT: cmpq %rcx, %rsi -; X64-NEXT: cmovaq %rdi, %rdx -; X64-NEXT: cmovneq %rdx, %rax -; X64-NEXT: cmovaq %rsi, %rcx +; X64-NEXT: cmpq %rdi, %rdx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: sbbq %rsi, %rdx +; X64-NEXT: cmovbq %rdi, %rax +; X64-NEXT: cmovbq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq ; @@ -238,52 +235,28 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %ecx, %esi +; X86-NEXT: cmpl %ebx, %edx +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmoval %esi, %eax -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: cmoval %esi, %ebp -; X86-NEXT: cmovel %eax, %ebp -; X86-NEXT: movl %edx, %eax -; X86-NEXT: cmoval %edi, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: cmovbl %ebx, %edx +; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmovbl %ebp, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: sbbl %ebx, %edi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: xorl %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %esi, %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: cmovel %ebp, %ecx -; X86-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpl %esi, %ebx -; X86-NEXT: movl %esi, %edi -; X86-NEXT: cmoval %ebx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %eax, %ebp -; X86-NEXT: cmoval %ebx, %esi -; X86-NEXT: cmovel %edi, %esi -; X86-NEXT: cmoval %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %eax, 12(%edi) -; X86-NEXT: movl %esi, 8(%edi) -; X86-NEXT: movl %edx, 4(%edi) -; X86-NEXT: movl %ecx, (%edi) -; X86-NEXT: movl %edi, %eax -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -395,31 +368,27 @@ define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpl %edx, %eax -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: cmoval %eax, %ebp -; X86-NEXT: cmpl %edi, %ebx -; X86-NEXT: cmoval %eax, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmovel %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmpl %edi, %edx +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: sbbl %ebx, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmoval %ebx, %edi -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: cmoval %eax, %ebx -; X86-NEXT: cmpl %esi, %ebp -; X86-NEXT: cmoval %eax, %ecx -; X86-NEXT: cmovel %ebx, %ecx -; X86-NEXT: cmoval %ebp, %esi +; X86-NEXT: cmovbl %edi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmovbl %ebx, %esi +; X86-NEXT: cmpl %ecx, %ebp +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: cmovbl %ecx, %ebp +; X86-NEXT: cmovbl %eax, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -1347,24 +1316,22 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: shrdl $28, %edi, %ecx ; X86-NEXT: sarl $28, %edi -; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: cmoval %esi, %ebx -; X86-NEXT: cmpl %edi, %edx -; X86-NEXT: cmoval %esi, %ecx -; X86-NEXT: cmovel %ebx, %ecx -; X86-NEXT: cmoval %edx, %edi +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: cmovbl %edx, %ecx +; X86-NEXT: cmovbl %esi, %edi ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll index 29d3e0d..e1538aa 100644 --- a/llvm/test/CodeGen/X86/umin.ll +++ b/llvm/test/CodeGen/X86/umin.ll @@ -121,11 +121,9 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl %eax, %edi -; X86-NEXT: cmovbl %ecx, %edi -; X86-NEXT: cmpl %edx, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: cmovbl %ecx, %eax -; X86-NEXT: cmovel %edi, %eax ; X86-NEXT: cmovbl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -137,12 +135,11 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind { define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: cmpq %rdx, %rdi ; X64-NEXT: movq %rdx, %rax +; X64-NEXT: cmpq %rdx, %rdi +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: sbbq %rcx, %rdx ; X64-NEXT: cmovbq %rdi, %rax -; X64-NEXT: cmpq %rcx, %rsi -; X64-NEXT: cmovbq %rdi, %rdx -; X64-NEXT: cmovneq %rdx, %rax ; X64-NEXT: cmovbq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq @@ -153,51 +150,29 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %edx, %eax -; X86-NEXT: cmovbl %edi, %eax -; X86-NEXT: cmpl %esi, %ebp -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: cmovbl %edi, %ebx -; X86-NEXT: cmovel %eax, %ebx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: cmovbl %ebp, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %ecx, %edi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmovbl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl %edx, %ebx +; X86-NEXT: sbbl %esi, %ebp ; X86-NEXT: movl %eax, %ebp -; X86-NEXT: sbbl %edi, %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: cmovbl %ebx, %edx ; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: xorl %edi, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: orl %ebp, %eax -; X86-NEXT: cmovel %ebx, %edx -; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl %edi, %eax ; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovbl %eax, %edi +; X86-NEXT: cmovbl %edi, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %ebp, 12(%eax) ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $8, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -756,24 +731,22 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: shrdl $28, %edi, %ecx ; X86-NEXT: sarl $28, %edi -; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: cmovbl %esi, %ebx -; X86-NEXT: cmpl %edi, %edx -; X86-NEXT: cmovbl %esi, %ecx -; X86-NEXT: cmovel %ebx, %ecx -; X86-NEXT: cmovbl %edx, %edi +; X86-NEXT: cmpl %ecx, %edx +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: cmovbl %edx, %ecx +; X86-NEXT: cmovbl %esi, %edi ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -- 2.7.4