From bbdf24357932b064f2aa18ea1356b474e0220dde Mon Sep 17 00:00:00 2001 From: Samuel Parker Date: Mon, 30 Jan 2023 09:36:15 +0000 Subject: [PATCH] [DAGCombine] Fold redundant select If a chain of two selects share a true/false value and are controlled by two setcc nodes, that are never both true, we can fold away one of the selects. So, the following: (select (setcc X, const0, eq), Y, (select (setcc X, const1, eq), Z, Y)) Can be combined to: select (setcc X, const1, eq) Z, Y Differential Revision: https://reviews.llvm.org/D142535 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 67 ++ llvm/test/CodeGen/ARM/fpclamptosat.ll | 590 +++++++--------- llvm/test/CodeGen/ARM/fpclamptosat_vec.ll | 402 ++++++----- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 230 +++---- llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll | 253 +++---- llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll | 402 +++++------ llvm/test/CodeGen/WebAssembly/fpclamptosat.ll | 45 -- llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll | 60 -- llvm/test/CodeGen/X86/fpclamptosat.ll | 18 +- llvm/test/CodeGen/X86/fpclamptosat_vec.ll | 51 +- llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 803 ++++++++++------------ 11 files changed, 1288 insertions(+), 1633 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9463b82..b61a2ee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10893,6 +10893,73 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // If we have a chain of two selects, which share a true/false value and + // both are controlled from the two setcc nodes which cannot produce the + // same value, we can fold away N. + // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y + auto IsSelect = [](SDValue Op) { + return Op->getOpcode() == ISD::SELECT; + }; + if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) { + auto AreSame = [](SDValue Op0, SDValue Op1) { + if (Op0 == Op1) + return true; + auto *C0 = dyn_cast(Op0); + auto *C1 = dyn_cast(Op1); + return C0 && C1 && + APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue()); + }; + + SDValue OtherSelect; + bool SelectsShareOp = false; + if (IsSelect(N1)) { + OtherSelect = N1; + SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2); + } else { + OtherSelect = N2; + SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1); + } + + auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) { + if (SetCC0->getOpcode() != ISD::SETCC || + SetCC1->getOpcode() != ISD::SETCC || + SetCC0->getOperand(0) != SetCC1->getOperand(0)) + return false; + + ISD::CondCode CC0 = cast(SetCC0.getOperand(2))->get(); + ISD::CondCode CC1 = cast(SetCC1.getOperand(2))->get(); + auto *C0 = dyn_cast(SetCC0.getOperand(1)); + auto *C1 = dyn_cast(SetCC1.getOperand(1)); + if (!C0 || !C1) + return false; + + bool ConstantsAreSame = + APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue()); + auto IsEqual = [](ISD::CondCode CC) { + return CC == ISD::SETEQ; + }; + auto IsNotEqual = [](ISD::CondCode CC) { + return CC == ISD::SETLT || CC == ISD::SETULT || + CC == ISD::SETGT || CC == ISD::SETUGT || + CC == ISD::SETNE; + }; + + if (ConstantsAreSame && IsNotEqual(CC0) && IsEqual(CC1)) + return true; + if (ConstantsAreSame && IsNotEqual(CC1) && IsEqual(CC0)) + return true; + if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1)) + return true; + + return false; + }; + + SDValue SetCC0 = N0; + SDValue SetCC1 = OtherSelect.getOperand(0); + if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1)) + return OtherSelect; + } + if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 91c1a21..1f5e305 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -3099,130 +3099,117 @@ define i64 @stest_f64i64_mm(double %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #20 +; SOFT-NEXT: sub sp, #20 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: ldr r5, .LCPI45_0 -; SOFT-NEXT: cmp r1, r5 +; SOFT-NEXT: ldr r0, .LCPI45_0 +; SOFT-NEXT: cmp r1, r0 +; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: blo .LBB45_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: ldr r5, .LCPI45_0 ; SOFT-NEXT: .LBB45_2: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bmi .LBB45_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: ldr r1, .LCPI45_0 ; SOFT-NEXT: .LBB45_4: @ %entry -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: orrs r7, r3 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: orrs r0, r3 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: beq .LBB45_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: .LBB45_6: @ %entry ; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: mvns r2, r0 -; SOFT-NEXT: cmp r4, r5 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: blo .LBB45_8 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: bmi .LBB45_8 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB45_8: @ %entry -; SOFT-NEXT: cmp r4, r5 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: bne .LBB45_26 +; SOFT-NEXT: movs r1, #1 +; SOFT-NEXT: lsls r1, r1, #31 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: bge .LBB45_10 ; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB45_27 +; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB45_10: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB45_12 -; SOFT-NEXT: .LBB45_11: @ %entry -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r5, r1 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bhi .LBB45_12 +; SOFT-NEXT: @ %bb.11: @ %entry +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB45_12: @ %entry -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: lsls r5, r0, #31 -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: bhi .LBB45_14 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: asrs r0, r3, #31 +; SOFT-NEXT: ands r0, r2 +; SOFT-NEXT: ands r0, r7 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: beq .LBB45_14 ; SOFT-NEXT: @ %bb.13: @ %entry -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: .LBB45_14: @ %entry -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB45_16 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: ldr r2, .LCPI45_0 +; SOFT-NEXT: cmp r4, r2 +; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: blo .LBB45_16 ; SOFT-NEXT: @ %bb.15: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: .LBB45_16: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: bpl .LBB45_28 +; SOFT-NEXT: cmp r4, r2 +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: beq .LBB45_18 ; SOFT-NEXT: @ %bb.17: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: blt .LBB45_29 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB45_18: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB45_20 -; SOFT-NEXT: .LBB45_19: -; SOFT-NEXT: asrs r3, r3, #31 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: ands r3, r2 +; SOFT-NEXT: bmi .LBB45_20 +; SOFT-NEXT: @ %bb.19: @ %entry +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: .LBB45_20: @ %entry -; SOFT-NEXT: ands r3, r7 -; SOFT-NEXT: adds r2, r3, #1 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB45_22 ; SOFT-NEXT: @ %bb.21: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: .LBB45_22: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r3, r1 -; SOFT-NEXT: blt .LBB45_30 +; SOFT-NEXT: cmp r5, r1 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bhi .LBB45_24 ; SOFT-NEXT: @ %bb.23: @ %entry -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: bls .LBB45_31 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB45_24: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB45_32 -; SOFT-NEXT: .LBB45_25: @ %entry -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: cmp r5, r1 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: beq .LBB45_26 +; SOFT-NEXT: @ %bb.25: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB45_26: @ %entry -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB45_10 -; SOFT-NEXT: .LBB45_27: @ %entry -; SOFT-NEXT: mov r6, r2 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB45_11 -; SOFT-NEXT: b .LBB45_12 +; SOFT-NEXT: bge .LBB45_28 +; SOFT-NEXT: @ %bb.27: @ %entry +; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB45_28: @ %entry -; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bge .LBB45_18 -; SOFT-NEXT: .LBB45_29: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB45_19 -; SOFT-NEXT: b .LBB45_20 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: beq .LBB45_30 +; SOFT-NEXT: @ %bb.29: @ %entry +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB45_30: @ %entry -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r1, r5 -; SOFT-NEXT: bhi .LBB45_24 -; SOFT-NEXT: .LBB45_31: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB45_25 -; SOFT-NEXT: .LBB45_32: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: add sp, #20 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.33: +; SOFT-NEXT: @ %bb.31: ; SOFT-NEXT: .LCPI45_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -3244,46 +3231,43 @@ define i64 @stest_f64i64_mm(double %x) { ; VFP2-NEXT: mov.w r5, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r9, r2, r3 +; VFP2-NEXT: orrs.w r7, r2, r3 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r4, r1 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: and.w r2, r2, r3, asr #31 ; VFP2-NEXT: mov.w r1, #-2147483648 +; VFP2-NEXT: cmp.w r5, #-1 +; VFP2-NEXT: mov.w r6, #-2147483648 +; VFP2-NEXT: and.w r2, r2, r5 ; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r7, r4 +; VFP2-NEXT: movgt r6, r4 ; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: mov r6, r3 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: andne.w r6, r2, r6, asr #31 -; VFP2-NEXT: and.w r2, r6, r5 -; VFP2-NEXT: mov.w r6, #-1 ; VFP2-NEXT: adds r2, #1 ; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r7 -; VFP2-NEXT: mov.w r7, #-1 +; VFP2-NEXT: movne r1, r6 +; VFP2-NEXT: mov.w r6, #-1 ; VFP2-NEXT: cmp r12, r8 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r7, r0 -; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movlo r6, r0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r7, r0 +; VFP2-NEXT: moveq r6, r0 ; VFP2-NEXT: cmp r3, #0 +; VFP2-NEXT: mov.w r9, #-1 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r6 -; VFP2-NEXT: cmp.w r9, #0 +; VFP2-NEXT: movpl r0, r9 +; VFP2-NEXT: cmp r7, #0 ; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r7 +; VFP2-NEXT: moveq r0, r6 ; VFP2-NEXT: cmp.w r4, #-2147483648 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r3, r0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r3, r0 ; VFP2-NEXT: cmp.w r5, #-1 @@ -3297,10 +3281,8 @@ define i64 @stest_f64i64_mm(double %x) { ; ; FULL-LABEL: stest_f64i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: .pad #4 -; FULL-NEXT: sub sp, #4 +; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; FULL-NEXT: bl __fixdfti ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: cmp r1, r12 @@ -3313,17 +3295,14 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: mov.w r7, #-2147483648 ; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: mov r5, r3 +; FULL-NEXT: and.w r2, r2, r3, asr #31 ; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r9, r4, r7, gt +; FULL-NEXT: and.w r2, r2, r6 +; FULL-NEXT: csel r5, r4, r7, gt ; FULL-NEXT: cmp.w r4, #-2147483648 ; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it ne -; FULL-NEXT: andne.w r5, r2, r5, asr #31 -; FULL-NEXT: and.w r2, r5, r6 -; FULL-NEXT: adds r5, r2, #1 -; FULL-NEXT: csel r2, r7, r9, eq +; FULL-NEXT: adds r2, #1 +; FULL-NEXT: csel r5, r7, r5, eq ; FULL-NEXT: mov.w r7, #-1 ; FULL-NEXT: cmp r1, r12 ; FULL-NEXT: csel r1, r0, r7, lo @@ -3337,11 +3316,10 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: csel r1, r0, r1, eq ; FULL-NEXT: cmp.w r6, #-1 ; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r5, #0 +; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r2 -; FULL-NEXT: add sp, #4 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; FULL-NEXT: mov r1, r5 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3693,7 +3671,6 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: ldr r0, .LCPI48_0 ; SOFT-NEXT: cmp r1, r0 ; SOFT-NEXT: mov r5, r1 @@ -3701,114 +3678,105 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT-NEXT: @ %bb.1: @ %entry ; SOFT-NEXT: ldr r5, .LCPI48_0 ; SOFT-NEXT: .LBB48_2: @ %entry -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bmi .LBB48_4 ; SOFT-NEXT: @ %bb.3: @ %entry ; SOFT-NEXT: ldr r1, .LCPI48_0 ; SOFT-NEXT: .LBB48_4: @ %entry -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r7 +; SOFT-NEXT: orrs r0, r3 ; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: beq .LBB48_6 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: .LBB48_6: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: movs r0, #0 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: bmi .LBB48_8 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB48_8: @ %entry ; SOFT-NEXT: movs r1, #1 ; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: bge .LBB48_10 ; SOFT-NEXT: @ %bb.9: @ %entry ; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB48_10: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bhi .LBB48_12 ; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB48_12: @ %entry -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB48_14 -; SOFT-NEXT: @ %bb.13: @ %entry -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: b .LBB48_15 -; SOFT-NEXT: .LBB48_14: -; SOFT-NEXT: asrs r3, r7, #31 -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: ands r3, r0 -; SOFT-NEXT: .LBB48_15: @ %entry -; SOFT-NEXT: ands r3, r2 -; SOFT-NEXT: adds r0, r3, #1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: asrs r0, r3, #31 +; SOFT-NEXT: ands r0, r2 +; SOFT-NEXT: ands r0, r7 +; SOFT-NEXT: adds r0, r0, #1 ; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_17 -; SOFT-NEXT: @ %bb.16: @ %entry +; SOFT-NEXT: beq .LBB48_14 +; SOFT-NEXT: @ %bb.13: @ %entry ; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: .LBB48_17: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r6, r3 -; SOFT-NEXT: ldr r0, .LCPI48_0 -; SOFT-NEXT: cmp r4, r0 -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB48_19 -; SOFT-NEXT: @ %bb.18: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB48_19: @ %entry -; SOFT-NEXT: cmp r4, r0 +; SOFT-NEXT: .LBB48_14: @ %entry +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: ldr r2, .LCPI48_0 +; SOFT-NEXT: cmp r4, r2 +; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: blo .LBB48_16 +; SOFT-NEXT: @ %bb.15: @ %entry +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: .LBB48_16: @ %entry +; SOFT-NEXT: cmp r4, r2 ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB48_21 -; SOFT-NEXT: @ %bb.20: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB48_21: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bmi .LBB48_23 -; SOFT-NEXT: @ %bb.22: @ %entry -; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB48_23: @ %entry +; SOFT-NEXT: beq .LBB48_18 +; SOFT-NEXT: @ %bb.17: @ %entry +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: .LBB48_18: @ %entry +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB48_20 +; SOFT-NEXT: @ %bb.19: @ %entry +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: .LBB48_20: @ %entry ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB48_25 -; SOFT-NEXT: @ %bb.24: @ %entry +; SOFT-NEXT: beq .LBB48_22 +; SOFT-NEXT: @ %bb.21: @ %entry ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB48_25: @ %entry +; SOFT-NEXT: .LBB48_22: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bhi .LBB48_27 -; SOFT-NEXT: @ %bb.26: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_27: @ %entry +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bhi .LBB48_24 +; SOFT-NEXT: @ %bb.23: @ %entry +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB48_24: @ %entry ; SOFT-NEXT: cmp r5, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB48_29 -; SOFT-NEXT: @ %bb.28: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: .LBB48_29: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bge .LBB48_31 -; SOFT-NEXT: @ %bb.30: @ %entry +; SOFT-NEXT: beq .LBB48_26 +; SOFT-NEXT: @ %bb.25: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB48_26: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bge .LBB48_28 +; SOFT-NEXT: @ %bb.27: @ %entry ; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_31: @ %entry +; SOFT-NEXT: .LBB48_28: @ %entry ; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB48_33 -; SOFT-NEXT: @ %bb.32: @ %entry +; SOFT-NEXT: beq .LBB48_30 +; SOFT-NEXT: @ %bb.29: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB48_33: @ %entry +; SOFT-NEXT: .LBB48_30: @ %entry +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: add sp, #20 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.34: +; SOFT-NEXT: @ %bb.31: ; SOFT-NEXT: .LCPI48_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -3830,46 +3798,43 @@ define i64 @stest_f32i64_mm(float %x) { ; VFP2-NEXT: mov.w r5, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r9, r2, r3 +; VFP2-NEXT: orrs.w r7, r2, r3 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r4, r1 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: and.w r2, r2, r3, asr #31 ; VFP2-NEXT: mov.w r1, #-2147483648 +; VFP2-NEXT: cmp.w r5, #-1 +; VFP2-NEXT: mov.w r6, #-2147483648 +; VFP2-NEXT: and.w r2, r2, r5 ; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r7, r4 +; VFP2-NEXT: movgt r6, r4 ; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: mov r6, r3 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: andne.w r6, r2, r6, asr #31 -; VFP2-NEXT: and.w r2, r6, r5 -; VFP2-NEXT: mov.w r6, #-1 ; VFP2-NEXT: adds r2, #1 ; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r7 -; VFP2-NEXT: mov.w r7, #-1 +; VFP2-NEXT: movne r1, r6 +; VFP2-NEXT: mov.w r6, #-1 ; VFP2-NEXT: cmp r12, r8 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r7, r0 -; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movlo r6, r0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r7, r0 +; VFP2-NEXT: moveq r6, r0 ; VFP2-NEXT: cmp r3, #0 +; VFP2-NEXT: mov.w r9, #-1 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r6 -; VFP2-NEXT: cmp.w r9, #0 +; VFP2-NEXT: movpl r0, r9 +; VFP2-NEXT: cmp r7, #0 ; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r7 +; VFP2-NEXT: moveq r0, r6 ; VFP2-NEXT: cmp.w r4, #-2147483648 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r3, r0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r3, r0 ; VFP2-NEXT: cmp.w r5, #-1 @@ -3883,10 +3848,8 @@ define i64 @stest_f32i64_mm(float %x) { ; ; FULL-LABEL: stest_f32i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: .pad #4 -; FULL-NEXT: sub sp, #4 +; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; FULL-NEXT: bl __fixsfti ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: cmp r1, r12 @@ -3899,17 +3862,14 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: mov.w r7, #-2147483648 ; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: mov r5, r3 +; FULL-NEXT: and.w r2, r2, r3, asr #31 ; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r9, r4, r7, gt +; FULL-NEXT: and.w r2, r2, r6 +; FULL-NEXT: csel r5, r4, r7, gt ; FULL-NEXT: cmp.w r4, #-2147483648 ; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it ne -; FULL-NEXT: andne.w r5, r2, r5, asr #31 -; FULL-NEXT: and.w r2, r5, r6 -; FULL-NEXT: adds r5, r2, #1 -; FULL-NEXT: csel r2, r7, r9, eq +; FULL-NEXT: adds r2, #1 +; FULL-NEXT: csel r5, r7, r5, eq ; FULL-NEXT: mov.w r7, #-1 ; FULL-NEXT: cmp r1, r12 ; FULL-NEXT: csel r1, r0, r7, lo @@ -3923,11 +3883,10 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: csel r1, r0, r1, eq ; FULL-NEXT: cmp.w r6, #-1 ; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r5, #0 +; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r2 -; FULL-NEXT: add sp, #4 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; FULL-NEXT: mov r1, r5 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -4275,7 +4234,6 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: ldr r0, .LCPI51_0 ; SOFT-NEXT: cmp r1, r0 ; SOFT-NEXT: mov r5, r1 @@ -4283,114 +4241,105 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT-NEXT: @ %bb.1: @ %entry ; SOFT-NEXT: ldr r5, .LCPI51_0 ; SOFT-NEXT: .LBB51_2: @ %entry -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bmi .LBB51_4 ; SOFT-NEXT: @ %bb.3: @ %entry ; SOFT-NEXT: ldr r1, .LCPI51_0 ; SOFT-NEXT: .LBB51_4: @ %entry -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r7 +; SOFT-NEXT: orrs r0, r3 ; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: beq .LBB51_6 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: .LBB51_6: @ %entry -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: movs r0, #0 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: bmi .LBB51_8 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB51_8: @ %entry ; SOFT-NEXT: movs r1, #1 ; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: bge .LBB51_10 ; SOFT-NEXT: @ %bb.9: @ %entry ; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB51_10: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bhi .LBB51_12 ; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB51_12: @ %entry -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB51_14 -; SOFT-NEXT: @ %bb.13: @ %entry -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: b .LBB51_15 -; SOFT-NEXT: .LBB51_14: -; SOFT-NEXT: asrs r3, r7, #31 -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: ands r3, r0 -; SOFT-NEXT: .LBB51_15: @ %entry -; SOFT-NEXT: ands r3, r2 -; SOFT-NEXT: adds r0, r3, #1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: asrs r0, r3, #31 +; SOFT-NEXT: ands r0, r2 +; SOFT-NEXT: ands r0, r7 +; SOFT-NEXT: adds r0, r0, #1 ; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_17 -; SOFT-NEXT: @ %bb.16: @ %entry +; SOFT-NEXT: beq .LBB51_14 +; SOFT-NEXT: @ %bb.13: @ %entry ; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: .LBB51_17: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r6, r3 -; SOFT-NEXT: ldr r0, .LCPI51_0 -; SOFT-NEXT: cmp r4, r0 -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB51_19 -; SOFT-NEXT: @ %bb.18: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB51_19: @ %entry -; SOFT-NEXT: cmp r4, r0 +; SOFT-NEXT: .LBB51_14: @ %entry +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: ldr r2, .LCPI51_0 +; SOFT-NEXT: cmp r4, r2 +; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: blo .LBB51_16 +; SOFT-NEXT: @ %bb.15: @ %entry +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: .LBB51_16: @ %entry +; SOFT-NEXT: cmp r4, r2 ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB51_21 -; SOFT-NEXT: @ %bb.20: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB51_21: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bmi .LBB51_23 -; SOFT-NEXT: @ %bb.22: @ %entry -; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB51_23: @ %entry +; SOFT-NEXT: beq .LBB51_18 +; SOFT-NEXT: @ %bb.17: @ %entry +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: .LBB51_18: @ %entry +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB51_20 +; SOFT-NEXT: @ %bb.19: @ %entry +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: .LBB51_20: @ %entry ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB51_25 -; SOFT-NEXT: @ %bb.24: @ %entry +; SOFT-NEXT: beq .LBB51_22 +; SOFT-NEXT: @ %bb.21: @ %entry ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB51_25: @ %entry +; SOFT-NEXT: .LBB51_22: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bhi .LBB51_27 -; SOFT-NEXT: @ %bb.26: @ %entry -; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_27: @ %entry +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bhi .LBB51_24 +; SOFT-NEXT: @ %bb.23: @ %entry +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB51_24: @ %entry ; SOFT-NEXT: cmp r5, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB51_29 -; SOFT-NEXT: @ %bb.28: @ %entry -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: .LBB51_29: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bge .LBB51_31 -; SOFT-NEXT: @ %bb.30: @ %entry +; SOFT-NEXT: beq .LBB51_26 +; SOFT-NEXT: @ %bb.25: @ %entry +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB51_26: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bge .LBB51_28 +; SOFT-NEXT: @ %bb.27: @ %entry ; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_31: @ %entry +; SOFT-NEXT: .LBB51_28: @ %entry ; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB51_33 -; SOFT-NEXT: @ %bb.32: @ %entry +; SOFT-NEXT: beq .LBB51_30 +; SOFT-NEXT: @ %bb.29: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB51_33: @ %entry +; SOFT-NEXT: .LBB51_30: @ %entry +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: add sp, #20 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.34: +; SOFT-NEXT: @ %bb.31: ; SOFT-NEXT: .LCPI51_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -4415,46 +4364,43 @@ define i64 @stest_f16i64_mm(half %x) { ; VFP2-NEXT: mov.w r5, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r9, r2, r3 +; VFP2-NEXT: orrs.w r7, r2, r3 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r4, r1 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: and.w r2, r2, r3, asr #31 ; VFP2-NEXT: mov.w r1, #-2147483648 +; VFP2-NEXT: cmp.w r5, #-1 +; VFP2-NEXT: mov.w r6, #-2147483648 +; VFP2-NEXT: and.w r2, r2, r5 ; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r7, r4 +; VFP2-NEXT: movgt r6, r4 ; VFP2-NEXT: cmp.w r4, #-2147483648 -; VFP2-NEXT: mov r6, r3 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: andne.w r6, r2, r6, asr #31 -; VFP2-NEXT: and.w r2, r6, r5 -; VFP2-NEXT: mov.w r6, #-1 ; VFP2-NEXT: adds r2, #1 ; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r7 -; VFP2-NEXT: mov.w r7, #-1 +; VFP2-NEXT: movne r1, r6 +; VFP2-NEXT: mov.w r6, #-1 ; VFP2-NEXT: cmp r12, r8 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r7, r0 -; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movlo r6, r0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r7, r0 +; VFP2-NEXT: moveq r6, r0 ; VFP2-NEXT: cmp r3, #0 +; VFP2-NEXT: mov.w r9, #-1 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r6 -; VFP2-NEXT: cmp.w r9, #0 +; VFP2-NEXT: movpl r0, r9 +; VFP2-NEXT: cmp r7, #0 ; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r7 +; VFP2-NEXT: moveq r0, r6 ; VFP2-NEXT: cmp.w r4, #-2147483648 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r3, r0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r3, r0 ; VFP2-NEXT: cmp.w r5, #-1 @@ -4468,10 +4414,8 @@ define i64 @stest_f16i64_mm(half %x) { ; ; FULL-LABEL: stest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; FULL-NEXT: .pad #4 -; FULL-NEXT: sub sp, #4 +; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti @@ -4486,17 +4430,14 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: mov.w r7, #-2147483648 ; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: mov r5, r3 +; FULL-NEXT: and.w r2, r2, r3, asr #31 ; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: csel r9, r4, r7, gt +; FULL-NEXT: and.w r2, r2, r6 +; FULL-NEXT: csel r5, r4, r7, gt ; FULL-NEXT: cmp.w r4, #-2147483648 ; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it ne -; FULL-NEXT: andne.w r5, r2, r5, asr #31 -; FULL-NEXT: and.w r2, r5, r6 -; FULL-NEXT: adds r5, r2, #1 -; FULL-NEXT: csel r2, r7, r9, eq +; FULL-NEXT: adds r2, #1 +; FULL-NEXT: csel r5, r7, r5, eq ; FULL-NEXT: mov.w r7, #-1 ; FULL-NEXT: cmp r1, r12 ; FULL-NEXT: csel r1, r0, r7, lo @@ -4510,11 +4451,10 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: csel r1, r0, r1, eq ; FULL-NEXT: cmp.w r6, #-1 ; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r5, #0 +; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r2 -; FULL-NEXT: add sp, #4 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; FULL-NEXT: mov r1, r5 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index a4d470b..db6f331 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -3671,95 +3671,93 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmn r1, #-2147483647 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: movlo r0, r1 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: mov r10, #0 -; CHECK-NEXT: andne r0, r2, r0, asr #31 ; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: movmi r10, r3 -; CHECK-NEXT: and r1, r0, r10 -; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: movlo r0, r11 +; CHECK-NEXT: movmi r5, r1 +; CHECK-NEXT: orrs r1, r2, r3 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: moveq r5, r0 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: and r0, r2, r3, asr #31 +; CHECK-NEXT: movmi r8, r3 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: and r1, r0, r8 ; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: movmi r8, r11 -; CHECK-NEXT: orrs r2, r2, r3 -; CHECK-NEXT: moveq r8, r0 -; CHECK-NEXT: cmn r10, #1 +; CHECK-NEXT: cmn r8, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: mov r9, #-2147483648 -; CHECK-NEXT: movgt r0, r8 -; CHECK-NEXT: cmp r8, #-2147483648 -; CHECK-NEXT: movhi r9, r8 +; CHECK-NEXT: mov r10, #-2147483648 +; CHECK-NEXT: movgt r0, r5 +; CHECK-NEXT: cmp r5, #-2147483648 +; CHECK-NEXT: movhi r10, r5 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: movne r9, r0 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movne r10, r0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #0 -; CHECK-NEXT: movlo r5, r0 +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: movlo r6, r0 ; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: moveq r6, r0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movpl r0, r4 ; CHECK-NEXT: orrs r12, r2, r3 -; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: moveq r0, r6 ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movlo r5, r1 +; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: and r2, r2, r3, asr #31 +; CHECK-NEXT: movlo r6, r1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movmi r7, r1 ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: moveq r7, r5 +; CHECK-NEXT: moveq r7, r6 ; CHECK-NEXT: cmp r7, #-2147483648 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: movhi r1, r0 ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: moveq r1, r0 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movhi r12, r0 ; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: movmi r6, r5 +; CHECK-NEXT: moveq r12, r0 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movmi r6, r1 ; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: movlo r4, r5 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movlo r4, r1 +; CHECK-NEXT: moveq r4, r1 +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: cmp r8, #-2147483648 +; CHECK-NEXT: cmp r5, #-2147483648 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movhi r6, r4 ; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: cmn r10, #1 -; CHECK-NEXT: movle r4, r12 -; CHECK-NEXT: cmn r5, #1 +; CHECK-NEXT: cmn r8, #1 +; CHECK-NEXT: movle r4, r5 +; CHECK-NEXT: cmn r1, #1 ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: vmov.32 d1[0], r4 ; CHECK-NEXT: movmi r6, r3 ; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: movle r0, r12 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: andne r3, r2, r3, asr #31 -; CHECK-NEXT: and r2, r3, r6 +; CHECK-NEXT: and r2, r2, r6 +; CHECK-NEXT: movle r0, r5 ; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: moveq r0, r1 -; CHECK-NEXT: cmn r6, #1 ; CHECK-NEXT: mov r1, #-2147483648 +; CHECK-NEXT: moveq r0, r12 +; CHECK-NEXT: cmn r6, #1 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: movgt r1, r7 ; CHECK-NEXT: cmp r7, #-2147483648 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: vmov.32 d1[1], r9 ; CHECK-NEXT: movls r7, r0 ; CHECK-NEXT: cmn r2, #1 +; CHECK-NEXT: vmov.32 d1[1], r10 ; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: add sp, sp, #16 @@ -3947,95 +3945,93 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: mov r10, #0 ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: andne r0, r2, r0, asr #31 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: movmi r10, r3 -; CHECK-NEXT: and r1, r0, r10 -; CHECK-NEXT: cmn r11, #-2147483647 +; CHECK-NEXT: cmn r1, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: mvn r8, #-2147483648 -; CHECK-NEXT: movlo r0, r11 +; CHECK-NEXT: movlo r0, r1 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r8, r11 -; CHECK-NEXT: orrs r2, r2, r3 -; CHECK-NEXT: moveq r8, r0 -; CHECK-NEXT: cmn r10, #1 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: movmi r5, r1 +; CHECK-NEXT: orrs r1, r2, r3 +; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: and r0, r2, r3, asr #31 +; CHECK-NEXT: movmi r8, r3 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: and r1, r0, r8 +; CHECK-NEXT: cmn r8, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: mov r9, #-2147483648 -; CHECK-NEXT: movgt r0, r8 -; CHECK-NEXT: cmp r8, #-2147483648 -; CHECK-NEXT: movhi r9, r8 +; CHECK-NEXT: mov r10, #-2147483648 +; CHECK-NEXT: movgt r0, r5 +; CHECK-NEXT: cmp r5, #-2147483648 +; CHECK-NEXT: movhi r10, r5 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: movne r9, r0 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movne r10, r0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #0 -; CHECK-NEXT: movlo r5, r0 +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: movlo r6, r0 ; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: moveq r6, r0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movpl r0, r4 ; CHECK-NEXT: orrs r12, r2, r3 -; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: moveq r0, r6 ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movlo r5, r1 +; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: and r2, r2, r3, asr #31 +; CHECK-NEXT: movlo r6, r1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movmi r7, r1 ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: moveq r7, r5 +; CHECK-NEXT: moveq r7, r6 ; CHECK-NEXT: cmp r7, #-2147483648 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: movhi r1, r0 ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: moveq r1, r0 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movhi r12, r0 ; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: movmi r6, r5 +; CHECK-NEXT: moveq r12, r0 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movmi r6, r1 ; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: movlo r4, r5 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movlo r4, r1 +; CHECK-NEXT: moveq r4, r1 +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: cmp r8, #-2147483648 +; CHECK-NEXT: cmp r5, #-2147483648 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movhi r6, r4 ; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: cmn r10, #1 -; CHECK-NEXT: movle r4, r12 -; CHECK-NEXT: cmn r5, #1 +; CHECK-NEXT: cmn r8, #1 +; CHECK-NEXT: movle r4, r5 +; CHECK-NEXT: cmn r1, #1 ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: vmov.32 d1[0], r4 ; CHECK-NEXT: movmi r6, r3 ; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: movle r0, r12 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: andne r3, r2, r3, asr #31 -; CHECK-NEXT: and r2, r3, r6 +; CHECK-NEXT: and r2, r2, r6 +; CHECK-NEXT: movle r0, r5 ; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: moveq r0, r1 -; CHECK-NEXT: cmn r6, #1 ; CHECK-NEXT: mov r1, #-2147483648 +; CHECK-NEXT: moveq r0, r12 +; CHECK-NEXT: cmn r6, #1 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: movgt r1, r7 ; CHECK-NEXT: cmp r7, #-2147483648 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: vmov.32 d1[1], r9 ; CHECK-NEXT: movls r7, r0 ; CHECK-NEXT: cmn r2, #1 +; CHECK-NEXT: vmov.32 d1[1], r10 ; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: add sp, sp, #16 @@ -4224,100 +4220,100 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mov r0, r3 -; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: andne r0, r2, r0, asr #31 -; CHECK-NEON-NEXT: mov r11, r1 -; CHECK-NEON-NEXT: movmi r10, r3 -; CHECK-NEON-NEXT: and r1, r0, r10 -; CHECK-NEON-NEXT: cmn r11, #-2147483647 +; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: cmn r1, #-2147483647 ; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: movlo r0, r11 +; CHECK-NEON-NEXT: mvn r11, #-2147483648 +; CHECK-NEON-NEXT: movlo r0, r1 ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mvn r8, #-2147483648 -; CHECK-NEON-NEXT: mov r9, #-2147483648 -; CHECK-NEON-NEXT: movmi r8, r11 -; CHECK-NEON-NEXT: orrs r2, r2, r3 -; CHECK-NEON-NEXT: moveq r8, r0 -; CHECK-NEON-NEXT: cmn r10, #1 +; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEON-NEXT: movmi r11, r1 +; CHECK-NEON-NEXT: orrs r1, r2, r3 +; CHECK-NEON-NEXT: mov r8, #0 +; CHECK-NEON-NEXT: moveq r11, r0 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: and r0, r2, r3, asr #31 +; CHECK-NEON-NEXT: movmi r8, r3 +; CHECK-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEON-NEXT: and r1, r0, r8 +; CHECK-NEON-NEXT: cmn r8, #1 ; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: mov r6, r3 -; CHECK-NEON-NEXT: movgt r0, r8 -; CHECK-NEON-NEXT: cmp r8, #-2147483648 -; CHECK-NEON-NEXT: movhi r9, r8 +; CHECK-NEON-NEXT: movgt r0, r11 +; CHECK-NEON-NEXT: cmp r11, #-2147483648 +; CHECK-NEON-NEXT: mov r2, #-2147483648 +; CHECK-NEON-NEXT: mov r9, r3 +; CHECK-NEON-NEXT: movhi r2, r11 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movne r9, r0 +; CHECK-NEON-NEXT: movne r2, r0 ; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEON-NEXT: mvn r7, #-2147483648 +; CHECK-NEON-NEXT: mvn r10, #-2147483648 +; CHECK-NEON-NEXT: str r1, [sp] @ 4-byte Spill ; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r5, #0 -; CHECK-NEON-NEXT: movlo r5, r0 +; CHECK-NEON-NEXT: mvn r6, #0 +; CHECK-NEON-NEXT: movlo r6, r0 ; CHECK-NEON-NEXT: mvn r4, #0 -; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: moveq r6, r0 ; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: movpl r0, r4 ; CHECK-NEON-NEXT: orrs r12, r2, r3 -; CHECK-NEON-NEXT: moveq r0, r5 +; CHECK-NEON-NEXT: moveq r0, r6 ; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r5, #-2147483648 -; CHECK-NEON-NEXT: movlo r5, r1 +; CHECK-NEON-NEXT: mvn r6, #-2147483648 +; CHECK-NEON-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEON-NEXT: movlo r6, r1 ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movmi r7, r1 +; CHECK-NEON-NEXT: movmi r10, r1 ; CHECK-NEON-NEXT: cmp r12, #0 -; CHECK-NEON-NEXT: moveq r7, r5 -; CHECK-NEON-NEXT: cmp r7, #-2147483648 +; CHECK-NEON-NEXT: moveq r10, r6 +; CHECK-NEON-NEXT: cmp r10, #-2147483648 ; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEON-NEXT: mvn r6, #0 ; CHECK-NEON-NEXT: movhi r1, r0 -; CHECK-NEON-NEXT: mov r12, #0 +; CHECK-NEON-NEXT: and r2, r2, r3, asr #31 ; CHECK-NEON-NEXT: moveq r1, r0 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mvn r6, #0 +; CHECK-NEON-NEXT: cmp r9, #0 ; CHECK-NEON-NEXT: movmi r6, r5 -; CHECK-NEON-NEXT: cmn r11, #-2147483647 +; CHECK-NEON-NEXT: cmn r7, #-2147483647 ; CHECK-NEON-NEXT: movlo r4, r5 +; CHECK-NEON-NEXT: ldr r7, [sp] @ 4-byte Reload ; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: movne r4, r6 -; CHECK-NEON-NEXT: cmp r8, #-2147483648 +; CHECK-NEON-NEXT: cmp r11, #-2147483648 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: movhi r6, r4 ; CHECK-NEON-NEXT: moveq r6, r4 -; CHECK-NEON-NEXT: cmn r10, #1 -; CHECK-NEON-NEXT: movle r4, r12 -; CHECK-NEON-NEXT: cmn r5, #1 +; CHECK-NEON-NEXT: cmn r8, #1 +; CHECK-NEON-NEXT: movle r4, r5 +; CHECK-NEON-NEXT: cmn r7, #1 ; CHECK-NEON-NEXT: moveq r4, r6 ; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: vmov.32 d1[0], r4 ; CHECK-NEON-NEXT: movmi r6, r3 ; CHECK-NEON-NEXT: cmn r6, #1 -; CHECK-NEON-NEXT: movle r0, r12 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: andne r3, r2, r3, asr #31 -; CHECK-NEON-NEXT: and r2, r3, r6 +; CHECK-NEON-NEXT: and r2, r2, r6 +; CHECK-NEON-NEXT: movle r0, r5 ; CHECK-NEON-NEXT: cmn r2, #1 ; CHECK-NEON-NEXT: moveq r0, r1 ; CHECK-NEON-NEXT: cmn r6, #1 ; CHECK-NEON-NEXT: mov r1, #-2147483648 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: movgt r1, r7 -; CHECK-NEON-NEXT: cmp r7, #-2147483648 +; CHECK-NEON-NEXT: movgt r1, r10 +; CHECK-NEON-NEXT: cmp r10, #-2147483648 ; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d1[1], r9 -; CHECK-NEON-NEXT: movls r7, r0 +; CHECK-NEON-NEXT: movls r10, r0 +; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEON-NEXT: cmn r2, #1 -; CHECK-NEON-NEXT: movne r7, r1 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: movne r10, r1 +; CHECK-NEON-NEXT: vmov.32 d1[1], r0 +; CHECK-NEON-NEXT: vmov.32 d0[1], r10 ; CHECK-NEON-NEXT: add sp, sp, #16 ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: add sp, sp, #4 @@ -4338,96 +4334,94 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-FP16-NEXT: cmn r1, #-2147483647 +; CHECK-FP16-NEXT: mvn r0, #-2147483648 +; CHECK-FP16-NEXT: mvn r5, #-2147483648 +; CHECK-FP16-NEXT: movlo r0, r1 ; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mov r0, r3 -; CHECK-FP16-NEXT: mov r10, #0 -; CHECK-FP16-NEXT: andne r0, r2, r0, asr #31 ; CHECK-FP16-NEXT: mov r11, r1 -; CHECK-FP16-NEXT: movmi r10, r3 -; CHECK-FP16-NEXT: and r1, r0, r10 -; CHECK-FP16-NEXT: cmn r11, #-2147483647 -; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movlo r0, r11 +; CHECK-FP16-NEXT: movmi r5, r1 +; CHECK-FP16-NEXT: orrs r1, r2, r3 +; CHECK-FP16-NEXT: mov r8, #0 +; CHECK-FP16-NEXT: moveq r5, r0 ; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mvn r8, #-2147483648 -; CHECK-FP16-NEXT: mov r9, #-2147483648 -; CHECK-FP16-NEXT: movmi r8, r11 -; CHECK-FP16-NEXT: orrs r2, r2, r3 -; CHECK-FP16-NEXT: moveq r8, r0 -; CHECK-FP16-NEXT: cmn r10, #1 +; CHECK-FP16-NEXT: and r0, r2, r3, asr #31 +; CHECK-FP16-NEXT: movmi r8, r3 +; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-FP16-NEXT: and r1, r0, r8 +; CHECK-FP16-NEXT: cmn r8, #1 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: mov r6, r3 -; CHECK-FP16-NEXT: movgt r0, r8 -; CHECK-FP16-NEXT: cmp r8, #-2147483648 -; CHECK-FP16-NEXT: movhi r9, r8 +; CHECK-FP16-NEXT: movgt r0, r5 +; CHECK-FP16-NEXT: cmp r5, #-2147483648 +; CHECK-FP16-NEXT: mov r10, #-2147483648 +; CHECK-FP16-NEXT: mov r9, r3 +; CHECK-FP16-NEXT: movhi r10, r5 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movne r9, r0 +; CHECK-FP16-NEXT: movne r10, r0 ; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] -; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-FP16-NEXT: mvn r7, #-2147483648 -; CHECK-FP16-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-FP16-NEXT: str r1, [sp, #4] @ 4-byte Spill ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r5, #0 -; CHECK-FP16-NEXT: movlo r5, r0 +; CHECK-FP16-NEXT: mvn r6, #0 +; CHECK-FP16-NEXT: movlo r6, r0 ; CHECK-FP16-NEXT: mvn r4, #0 -; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: moveq r6, r0 ; CHECK-FP16-NEXT: cmp r3, #0 ; CHECK-FP16-NEXT: movpl r0, r4 ; CHECK-FP16-NEXT: orrs r12, r2, r3 -; CHECK-FP16-NEXT: moveq r0, r5 +; CHECK-FP16-NEXT: moveq r0, r6 ; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: movlo r5, r1 +; CHECK-FP16-NEXT: mvn r6, #-2147483648 +; CHECK-FP16-NEXT: and r2, r2, r3, asr #31 +; CHECK-FP16-NEXT: movlo r6, r1 ; CHECK-FP16-NEXT: cmp r3, #0 ; CHECK-FP16-NEXT: movmi r7, r1 ; CHECK-FP16-NEXT: cmp r12, #0 -; CHECK-FP16-NEXT: moveq r7, r5 +; CHECK-FP16-NEXT: moveq r7, r6 ; CHECK-FP16-NEXT: cmp r7, #-2147483648 -; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-FP16-NEXT: movhi r1, r0 ; CHECK-FP16-NEXT: mov r12, #0 -; CHECK-FP16-NEXT: moveq r1, r0 -; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-FP16-NEXT: movhi r12, r0 ; CHECK-FP16-NEXT: mvn r6, #0 -; CHECK-FP16-NEXT: movmi r6, r5 +; CHECK-FP16-NEXT: moveq r12, r0 +; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: movmi r6, r1 ; CHECK-FP16-NEXT: cmn r11, #-2147483647 -; CHECK-FP16-NEXT: movlo r4, r5 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-FP16-NEXT: movlo r4, r1 +; CHECK-FP16-NEXT: moveq r4, r1 +; CHECK-FP16-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-FP16-NEXT: movne r4, r6 -; CHECK-FP16-NEXT: cmp r8, #-2147483648 +; CHECK-FP16-NEXT: cmp r5, #-2147483648 ; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: movhi r6, r4 ; CHECK-FP16-NEXT: moveq r6, r4 -; CHECK-FP16-NEXT: cmn r10, #1 -; CHECK-FP16-NEXT: movle r4, r12 -; CHECK-FP16-NEXT: cmn r5, #1 +; CHECK-FP16-NEXT: cmn r8, #1 +; CHECK-FP16-NEXT: movle r4, r5 +; CHECK-FP16-NEXT: cmn r1, #1 ; CHECK-FP16-NEXT: moveq r4, r6 ; CHECK-FP16-NEXT: cmp r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: vmov.32 d1[0], r4 ; CHECK-FP16-NEXT: movmi r6, r3 ; CHECK-FP16-NEXT: cmn r6, #1 -; CHECK-FP16-NEXT: movle r0, r12 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: andne r3, r2, r3, asr #31 -; CHECK-FP16-NEXT: and r2, r3, r6 +; CHECK-FP16-NEXT: and r2, r2, r6 +; CHECK-FP16-NEXT: movle r0, r5 ; CHECK-FP16-NEXT: cmn r2, #1 -; CHECK-FP16-NEXT: moveq r0, r1 -; CHECK-FP16-NEXT: cmn r6, #1 ; CHECK-FP16-NEXT: mov r1, #-2147483648 +; CHECK-FP16-NEXT: moveq r0, r12 +; CHECK-FP16-NEXT: cmn r6, #1 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 ; CHECK-FP16-NEXT: movgt r1, r7 ; CHECK-FP16-NEXT: cmp r7, #-2147483648 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d1[1], r9 ; CHECK-FP16-NEXT: movls r7, r0 ; CHECK-FP16-NEXT: cmn r2, #1 +; CHECK-FP16-NEXT: vmov.32 d1[1], r10 ; CHECK-FP16-NEXT: movne r7, r1 ; CHECK-FP16-NEXT: vmov.32 d0[1], r7 ; CHECK-FP16-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 7eb7e14..a7d4241 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -2972,50 +2972,47 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw t0, 8(sp) ; RV32IF-NEXT: lw a4, 12(sp) -; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: addi a6, a3, -1 +; RV32IF-NEXT: addi a5, a3, -1 ; RV32IF-NEXT: mv a2, t0 -; RV32IF-NEXT: beq a4, a6, .LBB45_2 +; RV32IF-NEXT: beq a4, a5, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: sltu a2, a4, a6 +; RV32IF-NEXT: sltu a2, a4, a5 ; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a2, a2, t0 ; RV32IF-NEXT: .LBB45_2: # %entry -; RV32IF-NEXT: or a7, a1, a0 -; RV32IF-NEXT: slti a5, a0, 0 +; RV32IF-NEXT: or a7, a0, a1 +; RV32IF-NEXT: slti a6, a1, 0 ; RV32IF-NEXT: bnez a7, .LBB45_16 ; RV32IF-NEXT: # %bb.3: # %entry ; RV32IF-NEXT: mv t0, a4 -; RV32IF-NEXT: bgez a0, .LBB45_17 +; RV32IF-NEXT: bgez a1, .LBB45_17 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: bgeu a4, a6, .LBB45_18 +; RV32IF-NEXT: bgeu a4, a5, .LBB45_18 ; RV32IF-NEXT: .LBB45_5: # %entry ; RV32IF-NEXT: beqz a7, .LBB45_7 ; RV32IF-NEXT: .LBB45_6: # %entry ; RV32IF-NEXT: mv a4, t0 ; RV32IF-NEXT: .LBB45_7: # %entry -; RV32IF-NEXT: srai a6, a0, 31 -; RV32IF-NEXT: and a1, a6, a1 -; RV32IF-NEXT: seqz a6, a0 -; RV32IF-NEXT: neg a5, a5 -; RV32IF-NEXT: and a5, a5, a0 -; RV32IF-NEXT: addi a6, a6, -1 -; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: neg a5, a6 +; RV32IF-NEXT: and a5, a5, a1 +; RV32IF-NEXT: srai a1, a1, 31 +; RV32IF-NEXT: mv t0, a4 ; RV32IF-NEXT: bgez a5, .LBB45_9 ; RV32IF-NEXT: # %bb.8: # %entry -; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: lui t0, 524288 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a6, a6, a1 +; RV32IF-NEXT: and a0, a1, a0 ; RV32IF-NEXT: mv a1, a4 ; RV32IF-NEXT: bltu a3, a4, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry ; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB45_11: # %entry -; RV32IF-NEXT: and a6, a6, a5 +; RV32IF-NEXT: and a6, a0, a5 ; RV32IF-NEXT: li a7, -1 ; RV32IF-NEXT: bne a6, a7, .LBB45_19 ; RV32IF-NEXT: # %bb.12: # %entry @@ -3032,19 +3029,19 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB45_16: # %entry -; RV32IF-NEXT: addi a2, a5, -1 +; RV32IF-NEXT: addi a2, a6, -1 ; RV32IF-NEXT: or a2, a2, t0 ; RV32IF-NEXT: mv t0, a4 -; RV32IF-NEXT: bltz a0, .LBB45_4 +; RV32IF-NEXT: bltz a1, .LBB45_4 ; RV32IF-NEXT: .LBB45_17: # %entry -; RV32IF-NEXT: mv t0, a6 -; RV32IF-NEXT: bltu a4, a6, .LBB45_5 +; RV32IF-NEXT: mv t0, a5 +; RV32IF-NEXT: bltu a4, a5, .LBB45_5 ; RV32IF-NEXT: .LBB45_18: # %entry -; RV32IF-NEXT: mv a4, a6 +; RV32IF-NEXT: mv a4, a5 ; RV32IF-NEXT: bnez a7, .LBB45_6 ; RV32IF-NEXT: j .LBB45_7 ; RV32IF-NEXT: .LBB45_19: # %entry -; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: mv a1, t0 ; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: beq a4, a3, .LBB45_13 ; RV32IF-NEXT: .LBB45_20: # %entry @@ -3111,50 +3108,47 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw t0, 8(sp) ; RV32IFD-NEXT: lw a4, 12(sp) -; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) ; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: addi a6, a3, -1 +; RV32IFD-NEXT: addi a5, a3, -1 ; RV32IFD-NEXT: mv a2, t0 -; RV32IFD-NEXT: beq a4, a6, .LBB45_2 +; RV32IFD-NEXT: beq a4, a5, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: sltu a2, a4, a6 +; RV32IFD-NEXT: sltu a2, a4, a5 ; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a2, a2, t0 ; RV32IFD-NEXT: .LBB45_2: # %entry -; RV32IFD-NEXT: or a7, a1, a0 -; RV32IFD-NEXT: slti a5, a0, 0 +; RV32IFD-NEXT: or a7, a0, a1 +; RV32IFD-NEXT: slti a6, a1, 0 ; RV32IFD-NEXT: bnez a7, .LBB45_16 ; RV32IFD-NEXT: # %bb.3: # %entry ; RV32IFD-NEXT: mv t0, a4 -; RV32IFD-NEXT: bgez a0, .LBB45_17 +; RV32IFD-NEXT: bgez a1, .LBB45_17 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: bgeu a4, a6, .LBB45_18 +; RV32IFD-NEXT: bgeu a4, a5, .LBB45_18 ; RV32IFD-NEXT: .LBB45_5: # %entry ; RV32IFD-NEXT: beqz a7, .LBB45_7 ; RV32IFD-NEXT: .LBB45_6: # %entry ; RV32IFD-NEXT: mv a4, t0 ; RV32IFD-NEXT: .LBB45_7: # %entry -; RV32IFD-NEXT: srai a6, a0, 31 -; RV32IFD-NEXT: and a1, a6, a1 -; RV32IFD-NEXT: seqz a6, a0 -; RV32IFD-NEXT: neg a5, a5 -; RV32IFD-NEXT: and a5, a5, a0 -; RV32IFD-NEXT: addi a6, a6, -1 -; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: neg a5, a6 +; RV32IFD-NEXT: and a5, a5, a1 +; RV32IFD-NEXT: srai a1, a1, 31 +; RV32IFD-NEXT: mv t0, a4 ; RV32IFD-NEXT: bgez a5, .LBB45_9 ; RV32IFD-NEXT: # %bb.8: # %entry -; RV32IFD-NEXT: lui a0, 524288 +; RV32IFD-NEXT: lui t0, 524288 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a6, a6, a1 +; RV32IFD-NEXT: and a0, a1, a0 ; RV32IFD-NEXT: mv a1, a4 ; RV32IFD-NEXT: bltu a3, a4, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry ; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB45_11: # %entry -; RV32IFD-NEXT: and a6, a6, a5 +; RV32IFD-NEXT: and a6, a0, a5 ; RV32IFD-NEXT: li a7, -1 ; RV32IFD-NEXT: bne a6, a7, .LBB45_19 ; RV32IFD-NEXT: # %bb.12: # %entry @@ -3171,19 +3165,19 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB45_16: # %entry -; RV32IFD-NEXT: addi a2, a5, -1 +; RV32IFD-NEXT: addi a2, a6, -1 ; RV32IFD-NEXT: or a2, a2, t0 ; RV32IFD-NEXT: mv t0, a4 -; RV32IFD-NEXT: bltz a0, .LBB45_4 +; RV32IFD-NEXT: bltz a1, .LBB45_4 ; RV32IFD-NEXT: .LBB45_17: # %entry -; RV32IFD-NEXT: mv t0, a6 -; RV32IFD-NEXT: bltu a4, a6, .LBB45_5 +; RV32IFD-NEXT: mv t0, a5 +; RV32IFD-NEXT: bltu a4, a5, .LBB45_5 ; RV32IFD-NEXT: .LBB45_18: # %entry -; RV32IFD-NEXT: mv a4, a6 +; RV32IFD-NEXT: mv a4, a5 ; RV32IFD-NEXT: bnez a7, .LBB45_6 ; RV32IFD-NEXT: j .LBB45_7 ; RV32IFD-NEXT: .LBB45_19: # %entry -; RV32IFD-NEXT: mv a1, a0 +; RV32IFD-NEXT: mv a1, t0 ; RV32IFD-NEXT: mv a0, a2 ; RV32IFD-NEXT: beq a4, a3, .LBB45_13 ; RV32IFD-NEXT: .LBB45_20: # %entry @@ -3246,11 +3240,7 @@ define i64 @utest_f64i64_mm(double %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunsdfti@plt -; RV64-NEXT: snez a2, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: snez a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3374,12 +3364,8 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB47_2: # %entry -; RV64-NEXT: slti a3, a1, 1 -; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slti a1, a1, 1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: beqz a2, .LBB47_4 ; RV64-NEXT: # %bb.3: # %entry @@ -3476,50 +3462,47 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw t0, 8(sp) ; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a0, 16(sp) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a6, a3, -1 +; RV32-NEXT: addi a5, a3, -1 ; RV32-NEXT: mv a2, t0 -; RV32-NEXT: beq a4, a6, .LBB48_2 +; RV32-NEXT: beq a4, a5, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a2, a4, a6 +; RV32-NEXT: sltu a2, a4, a5 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: or a7, a1, a0 -; RV32-NEXT: slti a5, a0, 0 +; RV32-NEXT: or a7, a0, a1 +; RV32-NEXT: slti a6, a1, 0 ; RV32-NEXT: bnez a7, .LBB48_16 ; RV32-NEXT: # %bb.3: # %entry ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bgez a0, .LBB48_17 +; RV32-NEXT: bgez a1, .LBB48_17 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: bgeu a4, a6, .LBB48_18 +; RV32-NEXT: bgeu a4, a5, .LBB48_18 ; RV32-NEXT: .LBB48_5: # %entry ; RV32-NEXT: beqz a7, .LBB48_7 ; RV32-NEXT: .LBB48_6: # %entry ; RV32-NEXT: mv a4, t0 ; RV32-NEXT: .LBB48_7: # %entry -; RV32-NEXT: srai a6, a0, 31 -; RV32-NEXT: and a1, a6, a1 -; RV32-NEXT: seqz a6, a0 -; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a5, a5, a0 -; RV32-NEXT: addi a6, a6, -1 -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: neg a5, a6 +; RV32-NEXT: and a5, a5, a1 +; RV32-NEXT: srai a1, a1, 31 +; RV32-NEXT: mv t0, a4 ; RV32-NEXT: bgez a5, .LBB48_9 ; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: lui t0, 524288 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a6, a6, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: mv a1, a4 ; RV32-NEXT: bltu a3, a4, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB48_11: # %entry -; RV32-NEXT: and a6, a6, a5 +; RV32-NEXT: and a6, a0, a5 ; RV32-NEXT: li a7, -1 ; RV32-NEXT: bne a6, a7, .LBB48_19 ; RV32-NEXT: # %bb.12: # %entry @@ -3536,19 +3519,19 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB48_16: # %entry -; RV32-NEXT: addi a2, a5, -1 +; RV32-NEXT: addi a2, a6, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltz a0, .LBB48_4 +; RV32-NEXT: bltz a1, .LBB48_4 ; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: mv t0, a6 -; RV32-NEXT: bltu a4, a6, .LBB48_5 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bltu a4, a5, .LBB48_5 ; RV32-NEXT: .LBB48_18: # %entry -; RV32-NEXT: mv a4, a6 +; RV32-NEXT: mv a4, a5 ; RV32-NEXT: bnez a7, .LBB48_6 ; RV32-NEXT: j .LBB48_7 ; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: mv a1, a0 +; RV32-NEXT: mv a1, t0 ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: beq a4, a3, .LBB48_13 ; RV32-NEXT: .LBB48_20: # %entry @@ -3609,11 +3592,7 @@ define i64 @utest_f32i64_mm(float %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: snez a2, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: snez a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3708,12 +3687,8 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB50_2: # %entry -; RV64-NEXT: slti a3, a1, 1 -; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slti a1, a1, 1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: beqz a2, .LBB50_4 ; RV64-NEXT: # %bb.3: # %entry @@ -3743,50 +3718,47 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw t0, 8(sp) ; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a0, 16(sp) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a6, a3, -1 +; RV32-NEXT: addi a5, a3, -1 ; RV32-NEXT: mv a2, t0 -; RV32-NEXT: beq a4, a6, .LBB51_2 +; RV32-NEXT: beq a4, a5, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a2, a4, a6 +; RV32-NEXT: sltu a2, a4, a5 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: or a7, a1, a0 -; RV32-NEXT: slti a5, a0, 0 +; RV32-NEXT: or a7, a0, a1 +; RV32-NEXT: slti a6, a1, 0 ; RV32-NEXT: bnez a7, .LBB51_16 ; RV32-NEXT: # %bb.3: # %entry ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bgez a0, .LBB51_17 +; RV32-NEXT: bgez a1, .LBB51_17 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: bgeu a4, a6, .LBB51_18 +; RV32-NEXT: bgeu a4, a5, .LBB51_18 ; RV32-NEXT: .LBB51_5: # %entry ; RV32-NEXT: beqz a7, .LBB51_7 ; RV32-NEXT: .LBB51_6: # %entry ; RV32-NEXT: mv a4, t0 ; RV32-NEXT: .LBB51_7: # %entry -; RV32-NEXT: srai a6, a0, 31 -; RV32-NEXT: and a1, a6, a1 -; RV32-NEXT: seqz a6, a0 -; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a5, a5, a0 -; RV32-NEXT: addi a6, a6, -1 -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: neg a5, a6 +; RV32-NEXT: and a5, a5, a1 +; RV32-NEXT: srai a1, a1, 31 +; RV32-NEXT: mv t0, a4 ; RV32-NEXT: bgez a5, .LBB51_9 ; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: lui t0, 524288 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a6, a6, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: mv a1, a4 ; RV32-NEXT: bltu a3, a4, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB51_11: # %entry -; RV32-NEXT: and a6, a6, a5 +; RV32-NEXT: and a6, a0, a5 ; RV32-NEXT: li a7, -1 ; RV32-NEXT: bne a6, a7, .LBB51_19 ; RV32-NEXT: # %bb.12: # %entry @@ -3803,19 +3775,19 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB51_16: # %entry -; RV32-NEXT: addi a2, a5, -1 +; RV32-NEXT: addi a2, a6, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltz a0, .LBB51_4 +; RV32-NEXT: bltz a1, .LBB51_4 ; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: mv t0, a6 -; RV32-NEXT: bltu a4, a6, .LBB51_5 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bltu a4, a5, .LBB51_5 ; RV32-NEXT: .LBB51_18: # %entry -; RV32-NEXT: mv a4, a6 +; RV32-NEXT: mv a4, a5 ; RV32-NEXT: bnez a7, .LBB51_6 ; RV32-NEXT: j .LBB51_7 ; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: mv a1, a0 +; RV32-NEXT: mv a1, t0 ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: beq a4, a3, .LBB51_13 ; RV32-NEXT: .LBB51_20: # %entry @@ -3922,11 +3894,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: snez a2, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: snez a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -4025,12 +3993,8 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB53_2: # %entry -; RV64-NEXT: slti a3, a1, 1 -; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slti a1, a1, 1 +; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: beqz a2, .LBB53_4 ; RV64-NEXT: # %bb.3: # %entry diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 77faf67..de9a54d 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -5572,20 +5572,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5623,15 +5615,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a3, s1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -5677,42 +5661,35 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti@plt -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv a3, a1 ; CHECK-NOV-NEXT: blez a1, .LBB47_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a2, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB47_2: # %entry ; CHECK-NOV-NEXT: mv a4, s1 ; CHECK-NOV-NEXT: blez s1, .LBB47_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: slti a0, s1, 1 -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a5, s1 -; CHECK-NOV-NEXT: addi a5, a5, -1 -; CHECK-NOV-NEXT: and a0, a5, a0 +; CHECK-NOV-NEXT: slti a1, a1, 1 +; CHECK-NOV-NEXT: neg a1, a1 +; CHECK-NOV-NEXT: slti a2, s1, 1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: beqz a4, .LBB47_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a4 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: and a2, a4, a2 ; CHECK-NOV-NEXT: .LBB47_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a3 -; CHECK-NOV-NEXT: beqz a2, .LBB47_8 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: beqz a3, .LBB47_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a1, a2, a1 +; CHECK-NOV-NEXT: sgtz a0, a3 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a1, a0, a1 ; CHECK-NOV-NEXT: .LBB47_8: # %entry +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5753,37 +5730,29 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry ; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: addi a4, s0, -1 -; CHECK-V-NEXT: seqz a4, a4 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: slti a5, a1, 1 -; CHECK-V-NEXT: neg a5, a5 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a6, a1 +; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: blez s0, .LBB47_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB47_4: # %entry -; CHECK-V-NEXT: and a1, a5, a0 -; CHECK-V-NEXT: addi a5, a6, -1 -; CHECK-V-NEXT: and a0, a4, a3 +; CHECK-V-NEXT: neg a3, a1 +; CHECK-V-NEXT: and a1, a4, s1 ; CHECK-V-NEXT: beqz s0, .LBB47_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a3, s0 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: sgtz a4, s0 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a1, a4, a1 ; CHECK-V-NEXT: .LBB47_6: # %entry -; CHECK-V-NEXT: and a1, a5, a1 +; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: beqz a2, .LBB47_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a1, a2, a1 +; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: .LBB47_8: # %entry -; CHECK-V-NEXT: sd a1, 24(sp) -; CHECK-V-NEXT: sd a0, 32(sp) +; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: sd a1, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6062,20 +6031,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6113,15 +6074,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a3, s1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -6167,42 +6120,35 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti@plt -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv a3, a1 ; CHECK-NOV-NEXT: blez a1, .LBB50_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a2, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB50_2: # %entry ; CHECK-NOV-NEXT: mv a4, s1 ; CHECK-NOV-NEXT: blez s1, .LBB50_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: slti a0, s1, 1 -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a5, s1 -; CHECK-NOV-NEXT: addi a5, a5, -1 -; CHECK-NOV-NEXT: and a0, a5, a0 +; CHECK-NOV-NEXT: slti a1, a1, 1 +; CHECK-NOV-NEXT: neg a1, a1 +; CHECK-NOV-NEXT: slti a2, s1, 1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: beqz a4, .LBB50_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a4 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: and a2, a4, a2 ; CHECK-NOV-NEXT: .LBB50_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a3 -; CHECK-NOV-NEXT: beqz a2, .LBB50_8 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: beqz a3, .LBB50_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a1, a2, a1 +; CHECK-NOV-NEXT: sgtz a0, a3 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a1, a0, a1 ; CHECK-NOV-NEXT: .LBB50_8: # %entry +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6243,37 +6189,29 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry ; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: addi a4, s0, -1 -; CHECK-V-NEXT: seqz a4, a4 -; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: slti a5, a1, 1 -; CHECK-V-NEXT: neg a5, a5 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a6, a1 +; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: blez s0, .LBB50_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB50_4: # %entry -; CHECK-V-NEXT: and a1, a5, a0 -; CHECK-V-NEXT: addi a5, a6, -1 -; CHECK-V-NEXT: and a0, a4, a3 +; CHECK-V-NEXT: neg a3, a1 +; CHECK-V-NEXT: and a1, a4, s1 ; CHECK-V-NEXT: beqz s0, .LBB50_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a3, s0 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: sgtz a4, s0 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a1, a4, a1 ; CHECK-V-NEXT: .LBB50_6: # %entry -; CHECK-V-NEXT: and a1, a5, a1 +; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: beqz a2, .LBB50_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a1, a2, a1 +; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: .LBB50_8: # %entry -; CHECK-V-NEXT: sd a1, 24(sp) -; CHECK-V-NEXT: sd a0, 32(sp) +; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: sd a1, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6547,20 +6485,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s2 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s1 -; CHECK-NOV-NEXT: addi s2, s2, -1 -; CHECK-NOV-NEXT: seqz a2, s2 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6589,20 +6519,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a2, a1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a0, a2, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: snez a1, s2 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a1, a1, s1 -; CHECK-V-NEXT: addi s2, s2, -1 -; CHECK-V-NEXT: seqz a2, s2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: sd a1, 8(sp) ; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 @@ -6646,42 +6568,35 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixsfti@plt -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv a3, a1 ; CHECK-NOV-NEXT: blez a1, .LBB53_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a2, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB53_2: # %entry ; CHECK-NOV-NEXT: mv a4, s1 ; CHECK-NOV-NEXT: blez s1, .LBB53_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: slti a0, s1, 1 -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, s0 -; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a5, s1 -; CHECK-NOV-NEXT: addi a5, a5, -1 -; CHECK-NOV-NEXT: and a0, a5, a0 +; CHECK-NOV-NEXT: slti a1, a1, 1 +; CHECK-NOV-NEXT: neg a1, a1 +; CHECK-NOV-NEXT: slti a2, s1, 1 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: beqz a4, .LBB53_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a4 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: and a2, a4, a2 ; CHECK-NOV-NEXT: .LBB53_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a3 -; CHECK-NOV-NEXT: beqz a2, .LBB53_8 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: beqz a3, .LBB53_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a1, a2, a1 +; CHECK-NOV-NEXT: sgtz a0, a3 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a1, a0, a1 ; CHECK-NOV-NEXT: .LBB53_8: # %entry +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6719,34 +6634,26 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a4, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry -; CHECK-V-NEXT: slti a3, a1, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: slti a0, s1, 1 -; CHECK-V-NEXT: neg a0, a0 -; CHECK-V-NEXT: and a0, a0, s0 -; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a5, s1 -; CHECK-V-NEXT: addi a5, a5, -1 -; CHECK-V-NEXT: and a0, a5, a0 +; CHECK-V-NEXT: slti a1, a1, 1 +; CHECK-V-NEXT: neg a3, a1 +; CHECK-V-NEXT: slti a1, s1, 1 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a1, a1, s0 ; CHECK-V-NEXT: beqz a4, .LBB53_6 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a4, a4 ; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: and a1, a4, a1 ; CHECK-V-NEXT: .LBB53_6: # %entry -; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: beqz a2, .LBB53_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a1, a2, a1 +; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: .LBB53_8: # %entry -; CHECK-V-NEXT: sd a1, 8(sp) -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: sd a0, 8(sp) +; CHECK-V-NEXT: sd a1, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v9, (a0) diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index d3297d2..d6376d3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -1817,100 +1817,94 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: mvn r11, #-2147483648 +; CHECK-NEXT: cmp r1, r11 ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: csel r0, r1, r11, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r1, r3, r0, mi -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r0, r2, r0, asr #31 -; CHECK-NEXT: mvn r11, #-2147483648 -; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: cmp r6, r11 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: add.w r3, r0, #1 -; CHECK-NEXT: csel r0, r6, r11, lo +; CHECK-NEXT: csel r1, r1, r11, mi +; CHECK-NEXT: orrs r3, r2 +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csel r4, r0, r1, eq +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r6, r11, mi -; CHECK-NEXT: orrs r2, r5 +; CHECK-NEXT: mov.w r1, #-2147483648 +; CHECK-NEXT: csel r8, r5, r0, mi +; CHECK-NEXT: and.w r0, r2, r5, asr #31 +; CHECK-NEXT: and.w r0, r0, r8 +; CHECK-NEXT: cmp.w r8, #-1 +; CHECK-NEXT: add.w r2, r0, #1 +; CHECK-NEXT: csel r0, r4, r1, gt +; CHECK-NEXT: cmp.w r4, #-2147483648 ; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r8, r0, r7, eq -; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csel r0, r8, r2, gt -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r1, r8, r2, hi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: csel r1, r4, r1, hi +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: mov lr, r0 +; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: csel r7, r1, r11, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: csel r4, r1, r11, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r7, r7, r4, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r4, r0, r9, mi -; CHECK-NEXT: mov.w r3, #-2147483648 -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r9, r7, r3, gt -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r12, r7, r3, hi -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r3, r2, r3, asr #31 -; CHECK-NEXT: and.w r2, r3, r4 -; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: csel r6, r1, r11, mi +; CHECK-NEXT: orrs.w r0, r2, r3 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csel r6, r7, r6, eq +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: csel r7, r3, r0, mi +; CHECK-NEXT: and.w r2, r2, r3, asr #31 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: mov.w r0, #-2147483648 +; CHECK-NEXT: csel lr, r6, r0, gt +; CHECK-NEXT: cmp.w r6, #-2147483648 +; CHECK-NEXT: and.w r2, r2, r7 +; CHECK-NEXT: csel r0, r6, r0, hi ; CHECK-NEXT: adds r2, #1 ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: csel r12, r12, r9, eq -; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: csel r6, r10, r3, lo -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r6, r10, r6, eq +; CHECK-NEXT: csel r0, r0, lr, eq +; CHECK-NEXT: mov.w lr, #-1 +; CHECK-NEXT: cmp r9, r11 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r2, r10, lr, lo +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: csel r2, r10, r2, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r10, r3, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: csel r6, r5, r8, hi -; CHECK-NEXT: csel r6, r5, r6, eq -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r5, r5, r8, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: csel r1, lr, r3, lo -; CHECK-NEXT: csel r1, lr, r1, eq +; CHECK-NEXT: csel r5, r10, lr, mi ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, lr, r3, mi -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r2, r2, r5, eq +; CHECK-NEXT: cmp.w r4, #-2147483648 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csel r4, r2, r5, hi +; CHECK-NEXT: csel r4, r2, r4, eq +; CHECK-NEXT: cmp.w r8, #-1 +; CHECK-NEXT: csel r2, r2, r5, gt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r2, r4, r2, eq +; CHECK-NEXT: cmp r1, r11 +; CHECK-NEXT: csel r1, r12, lr, lo +; CHECK-NEXT: csel r1, r12, r1, eq +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r0, r12, lr, mi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r8, hi -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: cmp.w r6, #-2147483648 +; CHECK-NEXT: csel r1, r0, r5, hi ; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r0, r0, r8, gt -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: csel r0, r0, r5, gt +; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -2074,105 +2068,96 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #28 -; CHECK-NEXT: sub sp, #28 -; CHECK-NEXT: vmov r4, r0, d0 +; CHECK-NEXT: .pad #20 +; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: vmov r7, r0, d0 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: mvn r9, #-2147483648 +; CHECK-NEXT: cmp r1, r9 ; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: csel r0, r1, r9, lo ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r1, r3, r0, mi -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r0, r2, r0, asr #31 -; CHECK-NEXT: mvn r10, #-2147483648 -; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: cmp r6, r10 ; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: add.w r3, r0, #1 -; CHECK-NEXT: csel r0, r6, r10, lo +; CHECK-NEXT: csel r1, r1, r9, mi +; CHECK-NEXT: orrs r3, r2 +; CHECK-NEXT: mov.w r8, #-2147483648 +; CHECK-NEXT: csel r4, r0, r1, eq +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r6, r10, mi -; CHECK-NEXT: orrs r2, r5 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r8, r0, r7, eq -; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csel r0, r8, r2, gt -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: csel r1, r8, r2, hi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: csel r0, r1, r0, eq ; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: csel r10, r5, r0, mi +; CHECK-NEXT: and.w r0, r2, r5, asr #31 +; CHECK-NEXT: and.w r0, r0, r10 +; CHECK-NEXT: cmp.w r10, #-1 +; CHECK-NEXT: add.w r2, r0, #1 +; CHECK-NEXT: csel r0, r4, r8, gt +; CHECK-NEXT: cmp.w r4, #-2147483648 +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csel r1, r4, r8, hi +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r0, r1, r0, eq +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: cmp r1, r10 +; CHECK-NEXT: cmp r1, r9 ; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r7, r1, r10, lo +; CHECK-NEXT: csel r12, r1, r9, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: csel r4, r1, r10, mi -; CHECK-NEXT: orrs.w r3, r2, r0 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: csel r7, r7, r4, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r4, r0, r9, mi -; CHECK-NEXT: mov.w r3, #-2147483648 -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r9, r7, r3, gt +; CHECK-NEXT: csel r7, r1, r9, mi +; CHECK-NEXT: orrs.w r0, r2, r3 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r7, r12, r7, eq +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: csel r12, r3, r0, mi +; CHECK-NEXT: and.w r2, r2, r3, asr #31 +; CHECK-NEXT: cmp.w r12, #-1 +; CHECK-NEXT: and.w r2, r2, r12 +; CHECK-NEXT: csel r0, r7, r8, gt ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r12, r7, r3, hi -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r3, r2, r3, asr #31 -; CHECK-NEXT: and.w r2, r3, r4 -; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: csel r8, r7, r8, hi ; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r12, r12, r9, eq -; CHECK-NEXT: cmp r6, r10 -; CHECK-NEXT: csel r6, r11, r3, lo -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: csel r8, r8, r0, eq +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: cmp r6, r9 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: csel r6, r11, r0, lo +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: csel r6, r11, r6, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r11, r3, mi +; CHECK-NEXT: csel r5, r11, r0, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: csel r6, r5, r8, hi -; CHECK-NEXT: csel r6, r5, r6, eq -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r5, r5, r8, gt -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: cmp.w r4, #-2147483648 +; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp r1, r10 -; CHECK-NEXT: csel r1, lr, r3, lo +; CHECK-NEXT: csel r4, r5, r6, hi +; CHECK-NEXT: csel r4, r5, r4, eq +; CHECK-NEXT: cmp.w r10, #-1 +; CHECK-NEXT: csel r5, r5, r6, gt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r4, r4, r5, eq +; CHECK-NEXT: cmp r1, r9 +; CHECK-NEXT: csel r1, lr, r0, lo +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, lr, r3, mi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r0, lr, r0, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r8, hi -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csel r1, r0, r6, hi +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload ; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r0, r0, r8, gt +; CHECK-NEXT: cmp.w r12, #-1 +; CHECK-NEXT: csel r0, r0, r6, gt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 -; CHECK-NEXT: add sp, #28 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: vmov q0[3], q0[1], r8, r0 +; CHECK-NEXT: add sp, #20 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -2335,100 +2320,91 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: mvn r11, #-2147483648 +; CHECK-NEXT: cmp r1, r11 ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: csel r0, r1, r11, lo ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: csel r1, r3, r0, mi -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r0, r2, r0, asr #31 -; CHECK-NEXT: mvn r11, #-2147483648 -; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: cmp r6, r11 ; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: add.w r3, r0, #1 -; CHECK-NEXT: csel r0, r6, r11, lo +; CHECK-NEXT: csel r1, r1, r11, mi +; CHECK-NEXT: orrs r3, r2 +; CHECK-NEXT: mov.w r9, #-2147483648 +; CHECK-NEXT: csel r4, r0, r1, eq +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r6, r11, mi -; CHECK-NEXT: orrs r2, r5 +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csel r8, r5, r0, mi +; CHECK-NEXT: and.w r0, r2, r5, asr #31 +; CHECK-NEXT: and.w r0, r0, r8 +; CHECK-NEXT: cmp.w r8, #-1 +; CHECK-NEXT: add.w r2, r0, #1 +; CHECK-NEXT: csel r0, r4, r9, gt +; CHECK-NEXT: cmp.w r4, #-2147483648 ; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r8, r0, r7, eq -; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: csel r0, r8, r2, gt -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r1, r8, r2, hi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: csel r1, r4, r9, hi +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp r1, r11 ; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r7, r1, r11, lo +; CHECK-NEXT: csel r12, r1, r11, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: csel r4, r1, r11, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r7, r7, r4, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r4, r0, r9, mi -; CHECK-NEXT: mov.w r3, #-2147483648 -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r9, r7, r3, gt +; CHECK-NEXT: csel r7, r1, r11, mi +; CHECK-NEXT: orrs.w r0, r2, r3 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csel r7, r12, r7, eq +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: csel r12, r3, r0, mi +; CHECK-NEXT: and.w r2, r2, r3, asr #31 +; CHECK-NEXT: cmp.w r12, #-1 +; CHECK-NEXT: and.w r2, r2, r12 +; CHECK-NEXT: csel r0, r7, r9, gt ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r12, r7, r3, hi -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: andne.w r3, r2, r3, asr #31 -; CHECK-NEXT: and.w r2, r3, r4 -; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: csel r9, r7, r9, hi ; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: csel r12, r12, r9, eq +; CHECK-NEXT: csel r9, r9, r0, eq +; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: csel r6, r10, r3, lo -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r6, r10, r0, lo +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: csel r6, r10, r6, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r10, r3, mi +; CHECK-NEXT: csel r5, r10, r0, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r8, #-2147483648 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: csel r6, r5, r8, hi -; CHECK-NEXT: csel r6, r5, r6, eq -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r5, r5, r8, gt +; CHECK-NEXT: cmp.w r4, #-2147483648 +; CHECK-NEXT: mov.w r6, #0 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csel r4, r5, r6, hi +; CHECK-NEXT: csel r4, r5, r4, eq +; CHECK-NEXT: cmp.w r8, #-1 +; CHECK-NEXT: csel r5, r5, r6, gt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: csel r5, r6, r5, eq +; CHECK-NEXT: csel r4, r4, r5, eq ; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: csel r1, lr, r3, lo +; CHECK-NEXT: csel r1, lr, r0, lo +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, lr, r3, mi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r0, lr, r0, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r8, hi -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: csel r1, r0, r6, hi +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r4, #-1 -; CHECK-NEXT: csel r0, r0, r8, gt +; CHECK-NEXT: cmp.w r12, #-1 +; CHECK-NEXT: csel r0, r0, r6, gt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 9a39c62..e539f00 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -574,16 +574,11 @@ define i64 @utest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -677,7 +672,6 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -685,10 +679,6 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -867,7 +857,6 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -875,10 +864,6 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -1509,16 +1494,11 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -1552,7 +1532,6 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1560,10 +1539,6 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -1627,16 +1602,11 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui float %x to i128 @@ -1670,7 +1640,6 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1678,10 +1647,6 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -1749,16 +1714,11 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i128 @@ -1794,7 +1754,6 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1802,10 +1761,6 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 007802d..facd15f 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -2309,27 +2309,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2381,7 +2371,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2389,10 +2378,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 @@ -2408,7 +2393,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2416,10 +2400,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 @@ -2611,27 +2591,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2683,7 +2653,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2691,10 +2660,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 @@ -2710,7 +2675,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2718,10 +2682,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 @@ -2917,27 +2877,17 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2991,7 +2941,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 @@ -2999,10 +2948,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 6 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 @@ -3018,7 +2963,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 @@ -3026,10 +2970,6 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index ac6d9d3..87e9a58 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -1081,8 +1081,6 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1101,11 +1099,9 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: callq __fixdfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi -; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx @@ -1147,8 +1143,6 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1167,11 +1161,9 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: callq __fixsfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi -; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx @@ -1221,8 +1213,6 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1241,11 +1231,9 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: callq __fixhfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi -; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index c351c1b..22fbc1c 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -2762,12 +2762,8 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2805,20 +2801,15 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: callq __fixdfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: cmovleq %rdx, %rdi ; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: cmovgq %rsi, %rdx ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: cmovgq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx +; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 @@ -2926,12 +2917,8 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2969,20 +2956,15 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: callq __fixsfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: cmovleq %rdx, %rdi ; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: cmovgq %rsi, %rdx ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: cmovgq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx +; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 @@ -3091,12 +3073,8 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -3134,20 +3112,15 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: callq __fixhfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: cmovleq %rdx, %rdi ; CHECK-NEXT: cmovgq %rcx, %rax -; CHECK-NEXT: cmpq $1, %rdx -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: cmovgq %rsi, %rdx ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: cmovgq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx +; CHECK-NEXT: cmovleq %r14, %rsi ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index b738765..f0ce231 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -370,123 +370,117 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $88, %esp +; X86-NEXT: subl $64, %esp ; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $31, %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $31, %ecx, %eax -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl 12(%ebp), %ebx +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $31, %ebx, %edi +; X86-NEXT: shldl $31, %ecx, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll $31, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edx ; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 +; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %esi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ebx -; X86-NEXT: testl %edi, %edi +; X86-NEXT: testl %esi, %esi ; X86-NEXT: sets %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %dl -; X86-NEXT: xorb %al, %dl -; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %cl +; X86-NEXT: xorb %al, %cl +; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi +; X86-NEXT: setne %bh +; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi ; X86-NEXT: pushl 20(%ebp) ; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 +; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: setne %al -; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: andl %ecx, %edi -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovel %ebx, %edi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: subl $1, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl $0, %edx +; X86-NEXT: setb %bl +; X86-NEXT: testb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addb $255, %bl +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X86-NEXT: testb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel %esi, %ecx ; X86-NEXT: cmpl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: cmovbl %ecx, %edx +; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF +; X86-NEXT: cmovbl %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %edx, %edx +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovnsl %eax, %esi ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovsl %ecx, %esi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: cmovnel %eax, %esi -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF +; X86-NEXT: cmovnsl %ebx, %edx +; X86-NEXT: movl $-1, %ebx +; X86-NEXT: cmovnsl %ebx, %eax +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovsl %ecx, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: cmpl $-2147483647, %edx # imm = 0x80000001 -; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-NEXT: cmovael %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: andl %eax, %esi +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000 +; X86-NEXT: cmovsl %edi, %edx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovsl %edi, %eax ; X86-NEXT: sarl $31, %ecx -; X86-NEXT: andl %esi, %ecx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 -; X86-NEXT: cmovsl %ebx, %edx -; X86-NEXT: movl $0, %ebx -; X86-NEXT: cmovsl %ebx, %esi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl %esi, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: andl %ebx, %ecx +; X86-NEXT: cmpl $-1, %ecx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -842,100 +836,78 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $256, %esp # imm = 0x100 -; X86-NEXT: movl 16(%ebp), %edi -; X86-NEXT: movl 32(%ebp), %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: subl $192, %esp +; X86-NEXT: movl 24(%ebp), %ebx +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, %edx +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: sarl $31, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: leal (%edi,%edi), %eax -; X86-NEXT: shrl $31, %edi -; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edx ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 32(%ebp) -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ecx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 36(%ebp), %edx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl 40(%ebp), %ecx ; X86-NEXT: movl %ecx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: leal (%ebx,%ebx), %eax +; X86-NEXT: shrl $31, %ebx +; X86-NEXT: shldl $31, %eax, %ebx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 28(%ebp), %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl 40(%ebp) ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 40(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: sarl $31, %edi ; X86-NEXT: leal (%ecx,%ecx), %eax ; X86-NEXT: shrl $31, %ecx ; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi @@ -949,27 +921,51 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl 36(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax +; X86-NEXT: calll __modti3 +; X86-NEXT: addl $32, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax @@ -977,327 +973,282 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: addl $32, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: sbbl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %edx -; X86-NEXT: sbbl $0, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %al ; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: sets %bl -; X86-NEXT: testl %edi, %edi -; X86-NEXT: sets %bh -; X86-NEXT: xorb %bl, %bh -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: sets %dl +; X86-NEXT: xorb %al, %dl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl %eax, %ebx ; X86-NEXT: setne %al -; X86-NEXT: testb %bh, %al -; X86-NEXT: cmovel %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: testb %dl, %al +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: andl %edi, %eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovsl %ecx, %ebx +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovsl (%esp), %esi # 4-byte Folded Reload +; X86-NEXT: negl %edi +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %edi, %edi +; X86-NEXT: orl (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl $0, %esi +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovel %edi, %ecx +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmovsl %esi, %edi +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovsl %esi, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: andl %ebx, %edx +; X86-NEXT: cmpl $-1, %edx +; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: cmovel %ecx, %edi +; X86-NEXT: shrdl $1, %edx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax +; X86-NEXT: calll __divti3 +; X86-NEXT: addl $32, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %bl -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %bh -; X86-NEXT: xorb %bl, %bh ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %al +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: sets %dl +; X86-NEXT: xorb %al, %dl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl %eax, %ebx ; X86-NEXT: setne %al -; X86-NEXT: testb %bh, %al -; X86-NEXT: cmovel %esi, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: testb %dl, %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: andl %ecx, %ebx +; X86-NEXT: testl %edi, %edi +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovsl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl (%esp), %eax # 4-byte Folded Reload +; X86-NEXT: negl %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: orl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %edx, %edi +; X86-NEXT: cmovnel %eax, %ecx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovel %edi, %ebx +; X86-NEXT: cmpl $-1, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: testl %edx, %edx +; X86-NEXT: cmovsl %edi, %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: cmovsl %ecx, %ebx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: andl %edx, %esi +; X86-NEXT: cmpl $-1, %esi +; X86-NEXT: cmovnel %ebx, %esi +; X86-NEXT: cmovel %eax, %edi +; X86-NEXT: shrdl $1, %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ebx +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %bl -; X86-NEXT: xorb %al, %bl -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl 28(%ebp) -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %dl +; X86-NEXT: xorb %al, %dl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl %eax, %esi ; X86-NEXT: setne %al -; X86-NEXT: testb %bl, %al -; X86-NEXT: cmovel %edi, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: testb %dl, %al +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovsl %ebx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %edi +; X86-NEXT: cmovsl %edx, %edi +; X86-NEXT: negl %ecx +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: orl (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmovnel %edi, %edx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovel %edx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmovsl %edi, %edx +; X86-NEXT: movl $-1, %edi +; X86-NEXT: cmovsl %edi, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: andl %ebx, %esi +; X86-NEXT: cmpl $-1, %esi +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: cmovel %ecx, %edx +; X86-NEXT: shrdl $1, %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: subl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %edx +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ecx ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %bl +; X86-NEXT: sets %al ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %bh -; X86-NEXT: xorb %bl, %bh -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: sets %bl +; X86-NEXT: xorb %al, %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl %eax, %edi ; X86-NEXT: setne %al -; X86-NEXT: testb %bh, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %esi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testb %bl, %al +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovsl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovsl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: sarl $31, %ebx ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: andl %esi, %eax ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: andl %eax, %ebx -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ecx, %ecx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovnel %esi, %ecx -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovel %edx, %ebx -; X86-NEXT: cmpl $-1, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovsl %ecx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %edi +; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: negl %esi ; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovsl %edx, %ecx -; X86-NEXT: movl $-1, %edx -; X86-NEXT: cmovsl %edx, %ebx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: orl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovnel %edi, %esi +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovel %edi, %eax ; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: cmovel %esi, %ecx -; X86-NEXT: cmovnel %ebx, %eax -; X86-NEXT: shldl $31, %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: andl %eax, %edi -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovel %esi, %edi -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl $-1, %ebx -; X86-NEXT: cmovsl %ebx, %edi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %ecx -; X86-NEXT: cmovel %edx, %eax -; X86-NEXT: cmovnel %edi, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: andl %eax, %edx -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmovel %esi, %edx -; X86-NEXT: cmpl $-1, %edx ; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovsl %esi, %eax +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: cmovsl %edi, %esi ; X86-NEXT: movl $-1, %edi -; X86-NEXT: cmovsl %edi, %edx +; X86-NEXT: cmovsl %edi, %eax +; X86-NEXT: andl (%esp), %ebx # 4-byte Folded Reload ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %edx, %ebx -; X86-NEXT: shldl $31, %eax, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: andl %eax, %edi -; X86-NEXT: negl %eax -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmovel %esi, %edi -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovsl %esi, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: andl %edx, %esi -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %edi, %esi -; X86-NEXT: shldl $31, %eax, %esi +; X86-NEXT: cmovnel %eax, %ebx +; X86-NEXT: cmovel %ecx, %esi +; X86-NEXT: shrdl $1, %ebx, %esi ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, 4(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -- 2.7.4