From: Samuel Parker Date: Tue, 7 Feb 2023 10:30:39 +0000 (+0000) Subject: Revert "[DAGCombine] Fold redundant select" X-Git-Tag: upstream/17.0.6~18360 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=91f8289ff0408e071e2ea32cf6b270778b1acdce;p=platform%2Fupstream%2Fllvm.git Revert "[DAGCombine] Fold redundant select" This reverts commit bbdf24357932b064f2aa18ea1356b474e0220dde. --- diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dbbbcd1..d7040bf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10956,73 +10956,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } - // If we have a chain of two selects, which share a true/false value and - // both are controlled from the two setcc nodes which cannot produce the - // same value, we can fold away N. - // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y - auto IsSelect = [](SDValue Op) { - return Op->getOpcode() == ISD::SELECT; - }; - if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) { - auto AreSame = [](SDValue Op0, SDValue Op1) { - if (Op0 == Op1) - return true; - auto *C0 = dyn_cast(Op0); - auto *C1 = dyn_cast(Op1); - return C0 && C1 && - APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue()); - }; - - SDValue OtherSelect; - bool SelectsShareOp = false; - if (IsSelect(N1)) { - OtherSelect = N1; - SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2); - } else { - OtherSelect = N2; - SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1); - } - - auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) { - if (SetCC0->getOpcode() != ISD::SETCC || - SetCC1->getOpcode() != ISD::SETCC || - SetCC0->getOperand(0) != SetCC1->getOperand(0)) - return false; - - ISD::CondCode CC0 = cast(SetCC0.getOperand(2))->get(); - ISD::CondCode CC1 = cast(SetCC1.getOperand(2))->get(); - auto *C0 = dyn_cast(SetCC0.getOperand(1)); - auto *C1 = dyn_cast(SetCC1.getOperand(1)); - if (!C0 || !C1) - return false; - - bool ConstantsAreSame = - APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue()); - auto IsEqual = [](ISD::CondCode CC) { - return CC == ISD::SETEQ; - }; - auto IsNotEqual = [](ISD::CondCode CC) { - return CC == ISD::SETLT || CC == ISD::SETULT || - CC == ISD::SETGT || CC == ISD::SETUGT || - CC == ISD::SETNE; - }; - - if (ConstantsAreSame && IsNotEqual(CC0) && IsEqual(CC1)) - return true; - if (ConstantsAreSame && IsNotEqual(CC1) && IsEqual(CC0)) - return true; - if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1)) - return true; - - return false; - }; - - SDValue SetCC0 = N0; - SDValue SetCC1 = OtherSelect.getOperand(0); - if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1)) - return OtherSelect; - } - if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 1f5e305..91c1a21 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -3099,117 +3099,130 @@ define i64 @stest_f64i64_mm(double %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: ldr r0, .LCPI45_0 -; SOFT-NEXT: cmp r1, r0 -; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: ldr r5, .LCPI45_0 +; SOFT-NEXT: cmp r1, r5 ; SOFT-NEXT: blo .LBB45_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: ldr r5, .LCPI45_0 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: .LBB45_2: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bmi .LBB45_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: ldr r1, .LCPI45_0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB45_4: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r3 -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r7, r2 +; SOFT-NEXT: orrs r7, r3 ; SOFT-NEXT: beq .LBB45_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB45_6: @ %entry ; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: bmi .LBB45_8 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mvns r2, r0 +; SOFT-NEXT: cmp r4, r5 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: blo .LBB45_8 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB45_8: @ %entry -; SOFT-NEXT: movs r1, #1 -; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: bge .LBB45_10 +; SOFT-NEXT: cmp r4, r5 +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: bne .LBB45_26 ; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bpl .LBB45_27 ; SOFT-NEXT: .LBB45_10: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bhi .LBB45_12 -; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB45_12 +; SOFT-NEXT: .LBB45_11: @ %entry +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB45_12: @ %entry -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: asrs r0, r3, #31 -; SOFT-NEXT: ands r0, r2 -; SOFT-NEXT: ands r0, r7 -; SOFT-NEXT: adds r0, r0, #1 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB45_14 +; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: lsls r5, r0, #31 +; SOFT-NEXT: cmp r1, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: bhi .LBB45_14 ; SOFT-NEXT: @ %bb.13: @ %entry -; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: .LBB45_14: @ %entry -; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r0, r0 -; SOFT-NEXT: ldr r2, .LCPI45_0 -; SOFT-NEXT: cmp r4, r2 -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB45_16 +; SOFT-NEXT: cmp r1, r5 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: beq .LBB45_16 ; SOFT-NEXT: @ %bb.15: @ %entry -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB45_16: @ %entry -; SOFT-NEXT: cmp r4, r2 -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB45_18 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: bpl .LBB45_28 ; SOFT-NEXT: @ %bb.17: @ %entry -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: blt .LBB45_29 ; SOFT-NEXT: .LBB45_18: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB45_20 -; SOFT-NEXT: @ %bb.19: @ %entry -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: beq .LBB45_20 +; SOFT-NEXT: .LBB45_19: +; SOFT-NEXT: asrs r3, r3, #31 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: ands r3, r2 ; SOFT-NEXT: .LBB45_20: @ %entry -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: ands r3, r7 +; SOFT-NEXT: adds r2, r3, #1 ; SOFT-NEXT: beq .LBB45_22 ; SOFT-NEXT: @ %bb.21: @ %entry -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB45_22: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bhi .LBB45_24 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: blt .LBB45_30 ; SOFT-NEXT: @ %bb.23: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: cmp r1, r5 +; SOFT-NEXT: bls .LBB45_31 ; SOFT-NEXT: .LBB45_24: @ %entry -; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB45_26 -; SOFT-NEXT: @ %bb.25: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bne .LBB45_32 +; SOFT-NEXT: .LBB45_25: @ %entry +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB45_26: @ %entry +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB45_10 +; SOFT-NEXT: .LBB45_27: @ %entry +; SOFT-NEXT: mov r6, r2 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bge .LBB45_28 -; SOFT-NEXT: @ %bb.27: @ %entry -; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: bne .LBB45_11 +; SOFT-NEXT: b .LBB45_12 ; SOFT-NEXT: .LBB45_28: @ %entry -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB45_30 -; SOFT-NEXT: @ %bb.29: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bge .LBB45_18 +; SOFT-NEXT: .LBB45_29: @ %entry +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bne .LBB45_19 +; SOFT-NEXT: b .LBB45_20 ; SOFT-NEXT: .LBB45_30: @ %entry -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: cmp r1, r5 +; SOFT-NEXT: bhi .LBB45_24 +; SOFT-NEXT: .LBB45_31: @ %entry +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: beq .LBB45_25 +; SOFT-NEXT: .LBB45_32: @ %entry +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.31: +; SOFT-NEXT: @ %bb.33: ; SOFT-NEXT: .LCPI45_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -3231,43 +3244,46 @@ define i64 @stest_f64i64_mm(double %x) { ; VFP2-NEXT: mov.w r5, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r7, r2, r3 +; VFP2-NEXT: orrs.w r9, r2, r3 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r4, r1 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: and.w r2, r2, r3, asr #31 -; VFP2-NEXT: mov.w r1, #-2147483648 ; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r6, #-2147483648 -; VFP2-NEXT: and.w r2, r2, r5 +; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: mov.w r1, #-2147483648 ; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r6, r4 +; VFP2-NEXT: movgt r7, r4 ; VFP2-NEXT: cmp.w r4, #-2147483648 +; VFP2-NEXT: mov r6, r3 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: adds r2, #1 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r6 +; VFP2-NEXT: andne.w r6, r2, r6, asr #31 +; VFP2-NEXT: and.w r2, r6, r5 ; VFP2-NEXT: mov.w r6, #-1 +; VFP2-NEXT: adds r2, #1 +; VFP2-NEXT: it ne +; VFP2-NEXT: movne r1, r7 +; VFP2-NEXT: mov.w r7, #-1 ; VFP2-NEXT: cmp r12, r8 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r6, r0 +; VFP2-NEXT: movlo r7, r0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r6, r0 +; VFP2-NEXT: moveq r7, r0 ; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: mov.w r9, #-1 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r9 -; VFP2-NEXT: cmp r7, #0 +; VFP2-NEXT: movpl r0, r6 +; VFP2-NEXT: cmp.w r9, #0 ; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r6 +; VFP2-NEXT: moveq r0, r7 ; VFP2-NEXT: cmp.w r4, #-2147483648 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r3, r0 -; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r3, r0 ; VFP2-NEXT: cmp.w r5, #-1 @@ -3281,8 +3297,10 @@ define i64 @stest_f64i64_mm(double %x) { ; ; FULL-LABEL: stest_f64i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; FULL-NEXT: .pad #4 +; FULL-NEXT: sub sp, #4 ; FULL-NEXT: bl __fixdfti ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: cmp r1, r12 @@ -3295,14 +3313,17 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: mov.w r7, #-2147483648 ; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: and.w r2, r2, r3, asr #31 +; FULL-NEXT: mov r5, r3 ; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: and.w r2, r2, r6 -; FULL-NEXT: csel r5, r4, r7, gt +; FULL-NEXT: csel r9, r4, r7, gt ; FULL-NEXT: cmp.w r4, #-2147483648 ; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: adds r2, #1 -; FULL-NEXT: csel r5, r7, r5, eq +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it ne +; FULL-NEXT: andne.w r5, r2, r5, asr #31 +; FULL-NEXT: and.w r2, r5, r6 +; FULL-NEXT: adds r5, r2, #1 +; FULL-NEXT: csel r2, r7, r9, eq ; FULL-NEXT: mov.w r7, #-1 ; FULL-NEXT: cmp r1, r12 ; FULL-NEXT: csel r1, r0, r7, lo @@ -3316,10 +3337,11 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: csel r1, r0, r1, eq ; FULL-NEXT: cmp.w r6, #-1 ; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: cmp r5, #0 ; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r5 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; FULL-NEXT: mov r1, r2 +; FULL-NEXT: add sp, #4 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3671,6 +3693,7 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: ldr r0, .LCPI48_0 ; SOFT-NEXT: cmp r1, r0 ; SOFT-NEXT: mov r5, r1 @@ -3678,105 +3701,114 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT-NEXT: @ %bb.1: @ %entry ; SOFT-NEXT: ldr r5, .LCPI48_0 ; SOFT-NEXT: .LBB48_2: @ %entry -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bmi .LBB48_4 ; SOFT-NEXT: @ %bb.3: @ %entry ; SOFT-NEXT: ldr r1, .LCPI48_0 ; SOFT-NEXT: .LBB48_4: @ %entry +; SOFT-NEXT: str r2, [sp] @ 4-byte Spill ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r3 +; SOFT-NEXT: orrs r0, r7 ; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: beq .LBB48_6 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: .LBB48_6: @ %entry -; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r2, r7 ; SOFT-NEXT: bmi .LBB48_8 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB48_8: @ %entry ; SOFT-NEXT: movs r1, #1 ; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: bge .LBB48_10 ; SOFT-NEXT: @ %bb.9: @ %entry ; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB48_10: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bhi .LBB48_12 ; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: mov r3, r1 ; SOFT-NEXT: .LBB48_12: @ %entry -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: asrs r0, r3, #31 -; SOFT-NEXT: ands r0, r2 -; SOFT-NEXT: ands r0, r7 -; SOFT-NEXT: adds r0, r0, #1 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_14 +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB48_14 ; SOFT-NEXT: @ %bb.13: @ %entry +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: b .LBB48_15 +; SOFT-NEXT: .LBB48_14: +; SOFT-NEXT: asrs r3, r7, #31 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: ands r3, r0 +; SOFT-NEXT: .LBB48_15: @ %entry +; SOFT-NEXT: ands r3, r2 +; SOFT-NEXT: adds r0, r3, #1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: beq .LBB48_17 +; SOFT-NEXT: @ %bb.16: @ %entry ; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: .LBB48_14: @ %entry -; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r0, r0 -; SOFT-NEXT: ldr r2, .LCPI48_0 -; SOFT-NEXT: cmp r4, r2 -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB48_16 -; SOFT-NEXT: @ %bb.15: @ %entry -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: .LBB48_16: @ %entry -; SOFT-NEXT: cmp r4, r2 +; SOFT-NEXT: .LBB48_17: @ %entry +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mvns r6, r3 +; SOFT-NEXT: ldr r0, .LCPI48_0 +; SOFT-NEXT: cmp r4, r0 +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: blo .LBB48_19 +; SOFT-NEXT: @ %bb.18: @ %entry +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: .LBB48_19: @ %entry +; SOFT-NEXT: cmp r4, r0 ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB48_18 -; SOFT-NEXT: @ %bb.17: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: .LBB48_18: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB48_20 -; SOFT-NEXT: @ %bb.19: @ %entry -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB48_20: @ %entry +; SOFT-NEXT: beq .LBB48_21 +; SOFT-NEXT: @ %bb.20: @ %entry +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: .LBB48_21: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bmi .LBB48_23 +; SOFT-NEXT: @ %bb.22: @ %entry +; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: .LBB48_23: @ %entry ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB48_22 -; SOFT-NEXT: @ %bb.21: @ %entry +; SOFT-NEXT: beq .LBB48_25 +; SOFT-NEXT: @ %bb.24: @ %entry ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB48_22: @ %entry +; SOFT-NEXT: .LBB48_25: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bhi .LBB48_24 -; SOFT-NEXT: @ %bb.23: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_24: @ %entry +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bhi .LBB48_27 +; SOFT-NEXT: @ %bb.26: @ %entry +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB48_27: @ %entry ; SOFT-NEXT: cmp r5, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB48_26 -; SOFT-NEXT: @ %bb.25: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: .LBB48_26: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bge .LBB48_28 -; SOFT-NEXT: @ %bb.27: @ %entry +; SOFT-NEXT: beq .LBB48_29 +; SOFT-NEXT: @ %bb.28: @ %entry +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB48_29: @ %entry +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bge .LBB48_31 +; SOFT-NEXT: @ %bb.30: @ %entry ; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB48_28: @ %entry +; SOFT-NEXT: .LBB48_31: @ %entry ; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB48_30 -; SOFT-NEXT: @ %bb.29: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB48_30: @ %entry ; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: beq .LBB48_33 +; SOFT-NEXT: @ %bb.32: @ %entry +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: .LBB48_33: @ %entry ; SOFT-NEXT: add sp, #20 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.31: +; SOFT-NEXT: @ %bb.34: ; SOFT-NEXT: .LCPI48_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -3798,43 +3830,46 @@ define i64 @stest_f32i64_mm(float %x) { ; VFP2-NEXT: mov.w r5, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r7, r2, r3 +; VFP2-NEXT: orrs.w r9, r2, r3 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r4, r1 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: and.w r2, r2, r3, asr #31 -; VFP2-NEXT: mov.w r1, #-2147483648 ; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r6, #-2147483648 -; VFP2-NEXT: and.w r2, r2, r5 +; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: mov.w r1, #-2147483648 ; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r6, r4 +; VFP2-NEXT: movgt r7, r4 ; VFP2-NEXT: cmp.w r4, #-2147483648 +; VFP2-NEXT: mov r6, r3 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: adds r2, #1 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r6 +; VFP2-NEXT: andne.w r6, r2, r6, asr #31 +; VFP2-NEXT: and.w r2, r6, r5 ; VFP2-NEXT: mov.w r6, #-1 +; VFP2-NEXT: adds r2, #1 +; VFP2-NEXT: it ne +; VFP2-NEXT: movne r1, r7 +; VFP2-NEXT: mov.w r7, #-1 ; VFP2-NEXT: cmp r12, r8 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r6, r0 +; VFP2-NEXT: movlo r7, r0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r6, r0 +; VFP2-NEXT: moveq r7, r0 ; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: mov.w r9, #-1 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r9 -; VFP2-NEXT: cmp r7, #0 +; VFP2-NEXT: movpl r0, r6 +; VFP2-NEXT: cmp.w r9, #0 ; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r6 +; VFP2-NEXT: moveq r0, r7 ; VFP2-NEXT: cmp.w r4, #-2147483648 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r3, r0 -; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r3, r0 ; VFP2-NEXT: cmp.w r5, #-1 @@ -3848,8 +3883,10 @@ define i64 @stest_f32i64_mm(float %x) { ; ; FULL-LABEL: stest_f32i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; FULL-NEXT: .pad #4 +; FULL-NEXT: sub sp, #4 ; FULL-NEXT: bl __fixsfti ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: cmp r1, r12 @@ -3862,14 +3899,17 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: mov.w r7, #-2147483648 ; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: and.w r2, r2, r3, asr #31 +; FULL-NEXT: mov r5, r3 ; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: and.w r2, r2, r6 -; FULL-NEXT: csel r5, r4, r7, gt +; FULL-NEXT: csel r9, r4, r7, gt ; FULL-NEXT: cmp.w r4, #-2147483648 ; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: adds r2, #1 -; FULL-NEXT: csel r5, r7, r5, eq +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it ne +; FULL-NEXT: andne.w r5, r2, r5, asr #31 +; FULL-NEXT: and.w r2, r5, r6 +; FULL-NEXT: adds r5, r2, #1 +; FULL-NEXT: csel r2, r7, r9, eq ; FULL-NEXT: mov.w r7, #-1 ; FULL-NEXT: cmp r1, r12 ; FULL-NEXT: csel r1, r0, r7, lo @@ -3883,10 +3923,11 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: csel r1, r0, r1, eq ; FULL-NEXT: cmp.w r6, #-1 ; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: cmp r5, #0 ; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r5 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; FULL-NEXT: mov r1, r2 +; FULL-NEXT: add sp, #4 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -4234,6 +4275,7 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: ldr r0, .LCPI51_0 ; SOFT-NEXT: cmp r1, r0 ; SOFT-NEXT: mov r5, r1 @@ -4241,105 +4283,114 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT-NEXT: @ %bb.1: @ %entry ; SOFT-NEXT: ldr r5, .LCPI51_0 ; SOFT-NEXT: .LBB51_2: @ %entry -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bmi .LBB51_4 ; SOFT-NEXT: @ %bb.3: @ %entry ; SOFT-NEXT: ldr r1, .LCPI51_0 ; SOFT-NEXT: .LBB51_4: @ %entry +; SOFT-NEXT: str r2, [sp] @ 4-byte Spill ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: orrs r0, r3 +; SOFT-NEXT: orrs r0, r7 ; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: beq .LBB51_6 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: .LBB51_6: @ %entry -; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r2, r7 ; SOFT-NEXT: bmi .LBB51_8 ; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB51_8: @ %entry ; SOFT-NEXT: movs r1, #1 ; SOFT-NEXT: lsls r1, r1, #31 -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: bge .LBB51_10 ; SOFT-NEXT: @ %bb.9: @ %entry ; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB51_10: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bhi .LBB51_12 ; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: mov r3, r1 ; SOFT-NEXT: .LBB51_12: @ %entry -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: asrs r0, r3, #31 -; SOFT-NEXT: ands r0, r2 -; SOFT-NEXT: ands r0, r7 -; SOFT-NEXT: adds r0, r0, #1 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_14 +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB51_14 ; SOFT-NEXT: @ %bb.13: @ %entry +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: b .LBB51_15 +; SOFT-NEXT: .LBB51_14: +; SOFT-NEXT: asrs r3, r7, #31 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: ands r3, r0 +; SOFT-NEXT: .LBB51_15: @ %entry +; SOFT-NEXT: ands r3, r2 +; SOFT-NEXT: adds r0, r3, #1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: beq .LBB51_17 +; SOFT-NEXT: @ %bb.16: @ %entry ; SOFT-NEXT: str r6, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: .LBB51_14: @ %entry -; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: mvns r0, r0 -; SOFT-NEXT: ldr r2, .LCPI51_0 -; SOFT-NEXT: cmp r4, r2 -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: blo .LBB51_16 -; SOFT-NEXT: @ %bb.15: @ %entry -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: .LBB51_16: @ %entry -; SOFT-NEXT: cmp r4, r2 +; SOFT-NEXT: .LBB51_17: @ %entry +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mvns r6, r3 +; SOFT-NEXT: ldr r0, .LCPI51_0 +; SOFT-NEXT: cmp r4, r0 +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: blo .LBB51_19 +; SOFT-NEXT: @ %bb.18: @ %entry +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: .LBB51_19: @ %entry +; SOFT-NEXT: cmp r4, r0 ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB51_18 -; SOFT-NEXT: @ %bb.17: @ %entry -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: .LBB51_18: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB51_20 -; SOFT-NEXT: @ %bb.19: @ %entry -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB51_20: @ %entry +; SOFT-NEXT: beq .LBB51_21 +; SOFT-NEXT: @ %bb.20: @ %entry +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: .LBB51_21: @ %entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bmi .LBB51_23 +; SOFT-NEXT: @ %bb.22: @ %entry +; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: .LBB51_23: @ %entry ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB51_22 -; SOFT-NEXT: @ %bb.21: @ %entry +; SOFT-NEXT: beq .LBB51_25 +; SOFT-NEXT: @ %bb.24: @ %entry ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB51_22: @ %entry +; SOFT-NEXT: .LBB51_25: @ %entry ; SOFT-NEXT: cmp r5, r1 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bhi .LBB51_24 -; SOFT-NEXT: @ %bb.23: @ %entry -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_24: @ %entry +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bhi .LBB51_27 +; SOFT-NEXT: @ %bb.26: @ %entry +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB51_27: @ %entry ; SOFT-NEXT: cmp r5, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB51_26 -; SOFT-NEXT: @ %bb.25: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: .LBB51_26: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bge .LBB51_28 -; SOFT-NEXT: @ %bb.27: @ %entry +; SOFT-NEXT: beq .LBB51_29 +; SOFT-NEXT: @ %bb.28: @ %entry +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB51_29: @ %entry +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bge .LBB51_31 +; SOFT-NEXT: @ %bb.30: @ %entry ; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB51_28: @ %entry +; SOFT-NEXT: .LBB51_31: @ %entry ; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB51_30 -; SOFT-NEXT: @ %bb.29: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB51_30: @ %entry ; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: beq .LBB51_33 +; SOFT-NEXT: @ %bb.32: @ %entry +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: .LBB51_33: @ %entry ; SOFT-NEXT: add sp, #20 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.31: +; SOFT-NEXT: @ %bb.34: ; SOFT-NEXT: .LCPI51_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -4364,43 +4415,46 @@ define i64 @stest_f16i64_mm(half %x) { ; VFP2-NEXT: mov.w r5, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r4, r12 -; VFP2-NEXT: orrs.w r7, r2, r3 +; VFP2-NEXT: orrs.w r9, r2, r3 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r4, r1 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r5, r3 -; VFP2-NEXT: and.w r2, r2, r3, asr #31 -; VFP2-NEXT: mov.w r1, #-2147483648 ; VFP2-NEXT: cmp.w r5, #-1 -; VFP2-NEXT: mov.w r6, #-2147483648 -; VFP2-NEXT: and.w r2, r2, r5 +; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: mov.w r1, #-2147483648 ; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r6, r4 +; VFP2-NEXT: movgt r7, r4 ; VFP2-NEXT: cmp.w r4, #-2147483648 +; VFP2-NEXT: mov r6, r3 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r1, r4 -; VFP2-NEXT: adds r2, #1 +; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r6 +; VFP2-NEXT: andne.w r6, r2, r6, asr #31 +; VFP2-NEXT: and.w r2, r6, r5 ; VFP2-NEXT: mov.w r6, #-1 +; VFP2-NEXT: adds r2, #1 +; VFP2-NEXT: it ne +; VFP2-NEXT: movne r1, r7 +; VFP2-NEXT: mov.w r7, #-1 ; VFP2-NEXT: cmp r12, r8 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r6, r0 +; VFP2-NEXT: movlo r7, r0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r6, r0 +; VFP2-NEXT: moveq r7, r0 ; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: mov.w r9, #-1 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r0, r9 -; VFP2-NEXT: cmp r7, #0 +; VFP2-NEXT: movpl r0, r6 +; VFP2-NEXT: cmp.w r9, #0 ; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r6 +; VFP2-NEXT: moveq r0, r7 ; VFP2-NEXT: cmp.w r4, #-2147483648 ; VFP2-NEXT: it hi ; VFP2-NEXT: movhi r3, r0 -; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r3, r0 ; VFP2-NEXT: cmp.w r5, #-1 @@ -4414,8 +4468,10 @@ define i64 @stest_f16i64_mm(half %x) { ; ; FULL-LABEL: stest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r6, r7, r8, lr} -; FULL-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; FULL-NEXT: .pad #4 +; FULL-NEXT: sub sp, #4 ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti @@ -4430,14 +4486,17 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: cmp r3, #0 ; FULL-NEXT: mov.w r7, #-2147483648 ; FULL-NEXT: csel r6, r3, lr, mi -; FULL-NEXT: and.w r2, r2, r3, asr #31 +; FULL-NEXT: mov r5, r3 ; FULL-NEXT: cmp.w r6, #-1 -; FULL-NEXT: and.w r2, r2, r6 -; FULL-NEXT: csel r5, r4, r7, gt +; FULL-NEXT: csel r9, r4, r7, gt ; FULL-NEXT: cmp.w r4, #-2147483648 ; FULL-NEXT: csel r7, r4, r7, hi -; FULL-NEXT: adds r2, #1 -; FULL-NEXT: csel r5, r7, r5, eq +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it ne +; FULL-NEXT: andne.w r5, r2, r5, asr #31 +; FULL-NEXT: and.w r2, r5, r6 +; FULL-NEXT: adds r5, r2, #1 +; FULL-NEXT: csel r2, r7, r9, eq ; FULL-NEXT: mov.w r7, #-1 ; FULL-NEXT: cmp r1, r12 ; FULL-NEXT: csel r1, r0, r7, lo @@ -4451,10 +4510,11 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: csel r1, r0, r1, eq ; FULL-NEXT: cmp.w r6, #-1 ; FULL-NEXT: csel r0, r0, lr, gt -; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: cmp r5, #0 ; FULL-NEXT: csel r0, r1, r0, eq -; FULL-NEXT: mov r1, r5 -; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; FULL-NEXT: mov r1, r2 +; FULL-NEXT: add sp, #4 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index db6f331..a4d470b 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -3671,93 +3671,95 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movlo r0, r1 ; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: andne r0, r2, r0, asr #31 ; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: movmi r5, r1 -; CHECK-NEXT: orrs r1, r2, r3 -; CHECK-NEXT: mov r8, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: movmi r10, r3 +; CHECK-NEXT: and r1, r0, r10 +; CHECK-NEXT: cmn r11, #-2147483647 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: movlo r0, r11 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: and r0, r2, r3, asr #31 -; CHECK-NEXT: movmi r8, r3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: and r1, r0, r8 +; CHECK-NEXT: mvn r8, #-2147483648 ; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: cmn r8, #1 +; CHECK-NEXT: movmi r8, r11 +; CHECK-NEXT: orrs r2, r2, r3 +; CHECK-NEXT: moveq r8, r0 +; CHECK-NEXT: cmn r10, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: mov r10, #-2147483648 -; CHECK-NEXT: movgt r0, r5 -; CHECK-NEXT: cmp r5, #-2147483648 -; CHECK-NEXT: movhi r10, r5 +; CHECK-NEXT: mov r9, #-2147483648 +; CHECK-NEXT: movgt r0, r8 +; CHECK-NEXT: cmp r8, #-2147483648 +; CHECK-NEXT: movhi r9, r8 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: mov r6, r3 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: movne r10, r0 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movne r9, r0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: movlo r6, r0 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: movlo r5, r0 ; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: moveq r5, r0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movpl r0, r4 ; CHECK-NEXT: orrs r12, r2, r3 -; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: moveq r0, r5 ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: and r2, r2, r3, asr #31 -; CHECK-NEXT: movlo r6, r1 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: movlo r5, r1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movmi r7, r1 ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: moveq r7, r6 +; CHECK-NEXT: moveq r7, r5 ; CHECK-NEXT: cmp r7, #-2147483648 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movhi r1, r0 ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: movhi r12, r0 +; CHECK-NEXT: moveq r1, r0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: moveq r12, r0 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movmi r6, r1 +; CHECK-NEXT: movmi r6, r5 ; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: movlo r4, r1 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: movlo r4, r5 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: cmp r5, #-2147483648 +; CHECK-NEXT: cmp r8, #-2147483648 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movhi r6, r4 ; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: cmn r8, #1 -; CHECK-NEXT: movle r4, r5 -; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: cmn r10, #1 +; CHECK-NEXT: movle r4, r12 +; CHECK-NEXT: cmn r5, #1 ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: vmov.32 d1[0], r4 ; CHECK-NEXT: movmi r6, r3 ; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: and r2, r2, r6 -; CHECK-NEXT: movle r0, r5 +; CHECK-NEXT: movle r0, r12 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: andne r3, r2, r3, asr #31 +; CHECK-NEXT: and r2, r3, r6 ; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: moveq r0, r12 +; CHECK-NEXT: moveq r0, r1 ; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: mov r1, #-2147483648 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: movgt r1, r7 ; CHECK-NEXT: cmp r7, #-2147483648 ; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: vmov.32 d1[1], r9 ; CHECK-NEXT: movls r7, r0 ; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: vmov.32 d1[1], r10 ; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: add sp, sp, #16 @@ -3945,93 +3947,95 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: movlo r0, r1 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: andne r0, r2, r0, asr #31 ; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: movmi r5, r1 -; CHECK-NEXT: orrs r1, r2, r3 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: movmi r10, r3 +; CHECK-NEXT: and r1, r0, r10 +; CHECK-NEXT: cmn r11, #-2147483647 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: movlo r0, r11 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r8, #0 -; CHECK-NEXT: and r0, r2, r3, asr #31 -; CHECK-NEXT: movmi r8, r3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: and r1, r0, r8 -; CHECK-NEXT: cmn r8, #1 +; CHECK-NEXT: movmi r8, r11 +; CHECK-NEXT: orrs r2, r2, r3 +; CHECK-NEXT: moveq r8, r0 +; CHECK-NEXT: cmn r10, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: mov r10, #-2147483648 -; CHECK-NEXT: movgt r0, r5 -; CHECK-NEXT: cmp r5, #-2147483648 -; CHECK-NEXT: movhi r10, r5 +; CHECK-NEXT: mov r9, #-2147483648 +; CHECK-NEXT: movgt r0, r8 +; CHECK-NEXT: cmp r8, #-2147483648 +; CHECK-NEXT: movhi r9, r8 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: mov r6, r3 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: movne r10, r0 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movne r9, r0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: movlo r6, r0 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: movlo r5, r0 ; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: moveq r5, r0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movpl r0, r4 ; CHECK-NEXT: orrs r12, r2, r3 -; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: moveq r0, r5 ; CHECK-NEXT: cmn r1, #-2147483647 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: and r2, r2, r3, asr #31 -; CHECK-NEXT: movlo r6, r1 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: movlo r5, r1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movmi r7, r1 ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: moveq r7, r6 +; CHECK-NEXT: moveq r7, r5 ; CHECK-NEXT: cmp r7, #-2147483648 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movhi r1, r0 ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: movhi r12, r0 +; CHECK-NEXT: moveq r1, r0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: moveq r12, r0 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movmi r6, r1 +; CHECK-NEXT: movmi r6, r5 ; CHECK-NEXT: cmn r11, #-2147483647 -; CHECK-NEXT: movlo r4, r1 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: movlo r4, r5 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: movne r4, r6 -; CHECK-NEXT: cmp r5, #-2147483648 +; CHECK-NEXT: cmp r8, #-2147483648 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movhi r6, r4 ; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: cmn r8, #1 -; CHECK-NEXT: movle r4, r5 -; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: cmn r10, #1 +; CHECK-NEXT: movle r4, r12 +; CHECK-NEXT: cmn r5, #1 ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: vmov.32 d1[0], r4 ; CHECK-NEXT: movmi r6, r3 ; CHECK-NEXT: cmn r6, #1 -; CHECK-NEXT: and r2, r2, r6 -; CHECK-NEXT: movle r0, r5 +; CHECK-NEXT: movle r0, r12 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: andne r3, r2, r3, asr #31 +; CHECK-NEXT: and r2, r3, r6 ; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: moveq r0, r12 +; CHECK-NEXT: moveq r0, r1 ; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: mov r1, #-2147483648 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: movgt r1, r7 ; CHECK-NEXT: cmp r7, #-2147483648 ; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: vmov.32 d1[1], r9 ; CHECK-NEXT: movls r7, r0 ; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: vmov.32 d1[1], r10 ; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: add sp, sp, #16 @@ -4220,100 +4224,100 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: mvn r11, #-2147483648 -; CHECK-NEON-NEXT: movlo r0, r1 +; CHECK-NEON-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEON-NEXT: movmi r11, r1 -; CHECK-NEON-NEXT: orrs r1, r2, r3 -; CHECK-NEON-NEXT: mov r8, #0 -; CHECK-NEON-NEXT: moveq r11, r0 +; CHECK-NEON-NEXT: mov r0, r3 +; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: andne r0, r2, r0, asr #31 +; CHECK-NEON-NEXT: mov r11, r1 +; CHECK-NEON-NEXT: movmi r10, r3 +; CHECK-NEON-NEXT: and r1, r0, r10 +; CHECK-NEON-NEXT: cmn r11, #-2147483647 +; CHECK-NEON-NEXT: mvn r0, #-2147483648 +; CHECK-NEON-NEXT: movlo r0, r11 ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: and r0, r2, r3, asr #31 -; CHECK-NEON-NEXT: movmi r8, r3 -; CHECK-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill -; CHECK-NEON-NEXT: and r1, r0, r8 -; CHECK-NEON-NEXT: cmn r8, #1 +; CHECK-NEON-NEXT: mvn r8, #-2147483648 +; CHECK-NEON-NEXT: mov r9, #-2147483648 +; CHECK-NEON-NEXT: movmi r8, r11 +; CHECK-NEON-NEXT: orrs r2, r2, r3 +; CHECK-NEON-NEXT: moveq r8, r0 +; CHECK-NEON-NEXT: cmn r10, #1 ; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: movgt r0, r11 -; CHECK-NEON-NEXT: cmp r11, #-2147483648 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: mov r9, r3 -; CHECK-NEON-NEXT: movhi r2, r11 +; CHECK-NEON-NEXT: mov r6, r3 +; CHECK-NEON-NEXT: movgt r0, r8 +; CHECK-NEON-NEXT: cmp r8, #-2147483648 +; CHECK-NEON-NEXT: movhi r9, r8 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movne r2, r0 +; CHECK-NEON-NEXT: movne r9, r0 ; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: mvn r10, #-2147483648 -; CHECK-NEON-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEON-NEXT: mvn r7, #-2147483648 ; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r6, #0 -; CHECK-NEON-NEXT: movlo r6, r0 +; CHECK-NEON-NEXT: mvn r5, #0 +; CHECK-NEON-NEXT: movlo r5, r0 ; CHECK-NEON-NEXT: mvn r4, #0 -; CHECK-NEON-NEXT: moveq r6, r0 +; CHECK-NEON-NEXT: moveq r5, r0 ; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: movpl r0, r4 ; CHECK-NEON-NEXT: orrs r12, r2, r3 -; CHECK-NEON-NEXT: moveq r0, r6 +; CHECK-NEON-NEXT: moveq r0, r5 ; CHECK-NEON-NEXT: cmn r1, #-2147483647 -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEON-NEXT: movlo r6, r1 +; CHECK-NEON-NEXT: mvn r5, #-2147483648 +; CHECK-NEON-NEXT: movlo r5, r1 ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movmi r10, r1 +; CHECK-NEON-NEXT: movmi r7, r1 ; CHECK-NEON-NEXT: cmp r12, #0 -; CHECK-NEON-NEXT: moveq r10, r6 -; CHECK-NEON-NEXT: cmp r10, #-2147483648 +; CHECK-NEON-NEXT: moveq r7, r5 +; CHECK-NEON-NEXT: cmp r7, #-2147483648 ; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mvn r6, #0 +; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload ; CHECK-NEON-NEXT: movhi r1, r0 -; CHECK-NEON-NEXT: and r2, r2, r3, asr #31 +; CHECK-NEON-NEXT: mov r12, #0 ; CHECK-NEON-NEXT: moveq r1, r0 -; CHECK-NEON-NEXT: cmp r9, #0 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: mvn r6, #0 ; CHECK-NEON-NEXT: movmi r6, r5 -; CHECK-NEON-NEXT: cmn r7, #-2147483647 +; CHECK-NEON-NEXT: cmn r11, #-2147483647 ; CHECK-NEON-NEXT: movlo r4, r5 -; CHECK-NEON-NEXT: ldr r7, [sp] @ 4-byte Reload ; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEON-NEXT: ldr r5, [sp, #4] @ 4-byte Reload ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: mov r5, #0 +; CHECK-NEON-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEON-NEXT: movne r4, r6 -; CHECK-NEON-NEXT: cmp r11, #-2147483648 +; CHECK-NEON-NEXT: cmp r8, #-2147483648 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: movhi r6, r4 ; CHECK-NEON-NEXT: moveq r6, r4 -; CHECK-NEON-NEXT: cmn r8, #1 -; CHECK-NEON-NEXT: movle r4, r5 -; CHECK-NEON-NEXT: cmn r7, #1 +; CHECK-NEON-NEXT: cmn r10, #1 +; CHECK-NEON-NEXT: movle r4, r12 +; CHECK-NEON-NEXT: cmn r5, #1 ; CHECK-NEON-NEXT: moveq r4, r6 ; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: vmov.32 d1[0], r4 ; CHECK-NEON-NEXT: movmi r6, r3 ; CHECK-NEON-NEXT: cmn r6, #1 -; CHECK-NEON-NEXT: and r2, r2, r6 -; CHECK-NEON-NEXT: movle r0, r5 +; CHECK-NEON-NEXT: movle r0, r12 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: andne r3, r2, r3, asr #31 +; CHECK-NEON-NEXT: and r2, r3, r6 ; CHECK-NEON-NEXT: cmn r2, #1 ; CHECK-NEON-NEXT: moveq r0, r1 ; CHECK-NEON-NEXT: cmn r6, #1 ; CHECK-NEON-NEXT: mov r1, #-2147483648 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: movgt r1, r10 -; CHECK-NEON-NEXT: cmp r10, #-2147483648 +; CHECK-NEON-NEXT: movgt r1, r7 +; CHECK-NEON-NEXT: cmp r7, #-2147483648 ; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: movls r10, r0 -; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEON-NEXT: vmov.32 d1[1], r9 +; CHECK-NEON-NEXT: movls r7, r0 ; CHECK-NEON-NEXT: cmn r2, #1 -; CHECK-NEON-NEXT: movne r10, r1 -; CHECK-NEON-NEXT: vmov.32 d1[1], r0 -; CHECK-NEON-NEXT: vmov.32 d0[1], r10 +; CHECK-NEON-NEXT: movne r7, r1 +; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: add sp, sp, #16 ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: add sp, sp, #4 @@ -4334,94 +4338,96 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: movlo r0, r1 ; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: mov r0, r3 +; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: andne r0, r2, r0, asr #31 ; CHECK-FP16-NEXT: mov r11, r1 -; CHECK-FP16-NEXT: movmi r5, r1 -; CHECK-FP16-NEXT: orrs r1, r2, r3 -; CHECK-FP16-NEXT: mov r8, #0 -; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: movmi r10, r3 +; CHECK-FP16-NEXT: and r1, r0, r10 +; CHECK-FP16-NEXT: cmn r11, #-2147483647 +; CHECK-FP16-NEXT: mvn r0, #-2147483648 +; CHECK-FP16-NEXT: movlo r0, r11 ; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: and r0, r2, r3, asr #31 -; CHECK-FP16-NEXT: movmi r8, r3 -; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-FP16-NEXT: and r1, r0, r8 -; CHECK-FP16-NEXT: cmn r8, #1 +; CHECK-FP16-NEXT: mvn r8, #-2147483648 +; CHECK-FP16-NEXT: mov r9, #-2147483648 +; CHECK-FP16-NEXT: movmi r8, r11 +; CHECK-FP16-NEXT: orrs r2, r2, r3 +; CHECK-FP16-NEXT: moveq r8, r0 +; CHECK-FP16-NEXT: cmn r10, #1 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movgt r0, r5 -; CHECK-FP16-NEXT: cmp r5, #-2147483648 -; CHECK-FP16-NEXT: mov r10, #-2147483648 -; CHECK-FP16-NEXT: mov r9, r3 -; CHECK-FP16-NEXT: movhi r10, r5 +; CHECK-FP16-NEXT: mov r6, r3 +; CHECK-FP16-NEXT: movgt r0, r8 +; CHECK-FP16-NEXT: cmp r8, #-2147483648 +; CHECK-FP16-NEXT: movhi r9, r8 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movne r10, r0 +; CHECK-FP16-NEXT: movne r9, r0 ; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] +; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-FP16-NEXT: mvn r7, #-2147483648 -; CHECK-FP16-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-FP16-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r6, #0 -; CHECK-FP16-NEXT: movlo r6, r0 +; CHECK-FP16-NEXT: mvn r5, #0 +; CHECK-FP16-NEXT: movlo r5, r0 ; CHECK-FP16-NEXT: mvn r4, #0 -; CHECK-FP16-NEXT: moveq r6, r0 +; CHECK-FP16-NEXT: moveq r5, r0 ; CHECK-FP16-NEXT: cmp r3, #0 ; CHECK-FP16-NEXT: movpl r0, r4 ; CHECK-FP16-NEXT: orrs r12, r2, r3 -; CHECK-FP16-NEXT: moveq r0, r6 +; CHECK-FP16-NEXT: moveq r0, r5 ; CHECK-FP16-NEXT: cmn r1, #-2147483647 -; CHECK-FP16-NEXT: mvn r6, #-2147483648 -; CHECK-FP16-NEXT: and r2, r2, r3, asr #31 -; CHECK-FP16-NEXT: movlo r6, r1 +; CHECK-FP16-NEXT: mvn r5, #-2147483648 +; CHECK-FP16-NEXT: movlo r5, r1 ; CHECK-FP16-NEXT: cmp r3, #0 ; CHECK-FP16-NEXT: movmi r7, r1 ; CHECK-FP16-NEXT: cmp r12, #0 -; CHECK-FP16-NEXT: moveq r7, r6 +; CHECK-FP16-NEXT: moveq r7, r5 ; CHECK-FP16-NEXT: cmp r7, #-2147483648 +; CHECK-FP16-NEXT: mov r1, #0 +; CHECK-FP16-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-FP16-NEXT: movhi r1, r0 ; CHECK-FP16-NEXT: mov r12, #0 -; CHECK-FP16-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-FP16-NEXT: movhi r12, r0 +; CHECK-FP16-NEXT: moveq r1, r0 +; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: mvn r6, #0 -; CHECK-FP16-NEXT: moveq r12, r0 -; CHECK-FP16-NEXT: cmp r9, #0 -; CHECK-FP16-NEXT: movmi r6, r1 +; CHECK-FP16-NEXT: movmi r6, r5 ; CHECK-FP16-NEXT: cmn r11, #-2147483647 -; CHECK-FP16-NEXT: movlo r4, r1 -; CHECK-FP16-NEXT: moveq r4, r1 -; CHECK-FP16-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-FP16-NEXT: movlo r4, r5 +; CHECK-FP16-NEXT: moveq r4, r5 +; CHECK-FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-FP16-NEXT: movne r4, r6 -; CHECK-FP16-NEXT: cmp r5, #-2147483648 +; CHECK-FP16-NEXT: cmp r8, #-2147483648 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: movhi r6, r4 ; CHECK-FP16-NEXT: moveq r6, r4 -; CHECK-FP16-NEXT: cmn r8, #1 -; CHECK-FP16-NEXT: movle r4, r5 -; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: cmn r10, #1 +; CHECK-FP16-NEXT: movle r4, r12 +; CHECK-FP16-NEXT: cmn r5, #1 ; CHECK-FP16-NEXT: moveq r4, r6 ; CHECK-FP16-NEXT: cmp r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: vmov.32 d1[0], r4 ; CHECK-FP16-NEXT: movmi r6, r3 ; CHECK-FP16-NEXT: cmn r6, #1 -; CHECK-FP16-NEXT: and r2, r2, r6 -; CHECK-FP16-NEXT: movle r0, r5 +; CHECK-FP16-NEXT: movle r0, r12 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: andne r3, r2, r3, asr #31 +; CHECK-FP16-NEXT: and r2, r3, r6 ; CHECK-FP16-NEXT: cmn r2, #1 -; CHECK-FP16-NEXT: mov r1, #-2147483648 -; CHECK-FP16-NEXT: moveq r0, r12 +; CHECK-FP16-NEXT: moveq r0, r1 ; CHECK-FP16-NEXT: cmn r6, #1 +; CHECK-FP16-NEXT: mov r1, #-2147483648 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 ; CHECK-FP16-NEXT: movgt r1, r7 ; CHECK-FP16-NEXT: cmp r7, #-2147483648 ; CHECK-FP16-NEXT: mov r0, #-2147483648 +; CHECK-FP16-NEXT: vmov.32 d1[1], r9 ; CHECK-FP16-NEXT: movls r7, r0 ; CHECK-FP16-NEXT: cmn r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[1], r10 ; CHECK-FP16-NEXT: movne r7, r1 ; CHECK-FP16-NEXT: vmov.32 d0[1], r7 ; CHECK-FP16-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index a7d4241..7eb7e14 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -2972,47 +2972,50 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a1, 20(sp) +; RV32IF-NEXT: lw a0, 20(sp) ; RV32IF-NEXT: lw t0, 8(sp) ; RV32IF-NEXT: lw a4, 12(sp) -; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a1, 16(sp) ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: addi a5, a3, -1 +; RV32IF-NEXT: addi a6, a3, -1 ; RV32IF-NEXT: mv a2, t0 -; RV32IF-NEXT: beq a4, a5, .LBB45_2 +; RV32IF-NEXT: beq a4, a6, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: sltu a2, a4, a5 +; RV32IF-NEXT: sltu a2, a4, a6 ; RV32IF-NEXT: addi a2, a2, -1 ; RV32IF-NEXT: or a2, a2, t0 ; RV32IF-NEXT: .LBB45_2: # %entry -; RV32IF-NEXT: or a7, a0, a1 -; RV32IF-NEXT: slti a6, a1, 0 +; RV32IF-NEXT: or a7, a1, a0 +; RV32IF-NEXT: slti a5, a0, 0 ; RV32IF-NEXT: bnez a7, .LBB45_16 ; RV32IF-NEXT: # %bb.3: # %entry ; RV32IF-NEXT: mv t0, a4 -; RV32IF-NEXT: bgez a1, .LBB45_17 +; RV32IF-NEXT: bgez a0, .LBB45_17 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: bgeu a4, a5, .LBB45_18 +; RV32IF-NEXT: bgeu a4, a6, .LBB45_18 ; RV32IF-NEXT: .LBB45_5: # %entry ; RV32IF-NEXT: beqz a7, .LBB45_7 ; RV32IF-NEXT: .LBB45_6: # %entry ; RV32IF-NEXT: mv a4, t0 ; RV32IF-NEXT: .LBB45_7: # %entry -; RV32IF-NEXT: neg a5, a6 -; RV32IF-NEXT: and a5, a5, a1 -; RV32IF-NEXT: srai a1, a1, 31 -; RV32IF-NEXT: mv t0, a4 +; RV32IF-NEXT: srai a6, a0, 31 +; RV32IF-NEXT: and a1, a6, a1 +; RV32IF-NEXT: seqz a6, a0 +; RV32IF-NEXT: neg a5, a5 +; RV32IF-NEXT: and a5, a5, a0 +; RV32IF-NEXT: addi a6, a6, -1 +; RV32IF-NEXT: mv a0, a4 ; RV32IF-NEXT: bgez a5, .LBB45_9 ; RV32IF-NEXT: # %bb.8: # %entry -; RV32IF-NEXT: lui t0, 524288 +; RV32IF-NEXT: lui a0, 524288 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: and a6, a6, a1 ; RV32IF-NEXT: mv a1, a4 ; RV32IF-NEXT: bltu a3, a4, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry ; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB45_11: # %entry -; RV32IF-NEXT: and a6, a0, a5 +; RV32IF-NEXT: and a6, a6, a5 ; RV32IF-NEXT: li a7, -1 ; RV32IF-NEXT: bne a6, a7, .LBB45_19 ; RV32IF-NEXT: # %bb.12: # %entry @@ -3029,19 +3032,19 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB45_16: # %entry -; RV32IF-NEXT: addi a2, a6, -1 +; RV32IF-NEXT: addi a2, a5, -1 ; RV32IF-NEXT: or a2, a2, t0 ; RV32IF-NEXT: mv t0, a4 -; RV32IF-NEXT: bltz a1, .LBB45_4 +; RV32IF-NEXT: bltz a0, .LBB45_4 ; RV32IF-NEXT: .LBB45_17: # %entry -; RV32IF-NEXT: mv t0, a5 -; RV32IF-NEXT: bltu a4, a5, .LBB45_5 +; RV32IF-NEXT: mv t0, a6 +; RV32IF-NEXT: bltu a4, a6, .LBB45_5 ; RV32IF-NEXT: .LBB45_18: # %entry -; RV32IF-NEXT: mv a4, a5 +; RV32IF-NEXT: mv a4, a6 ; RV32IF-NEXT: bnez a7, .LBB45_6 ; RV32IF-NEXT: j .LBB45_7 ; RV32IF-NEXT: .LBB45_19: # %entry -; RV32IF-NEXT: mv a1, t0 +; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: beq a4, a3, .LBB45_13 ; RV32IF-NEXT: .LBB45_20: # %entry @@ -3108,47 +3111,50 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: lw a0, 20(sp) ; RV32IFD-NEXT: lw t0, 8(sp) ; RV32IFD-NEXT: lw a4, 12(sp) -; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a1, 16(sp) ; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: addi a5, a3, -1 +; RV32IFD-NEXT: addi a6, a3, -1 ; RV32IFD-NEXT: mv a2, t0 -; RV32IFD-NEXT: beq a4, a5, .LBB45_2 +; RV32IFD-NEXT: beq a4, a6, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: sltu a2, a4, a5 +; RV32IFD-NEXT: sltu a2, a4, a6 ; RV32IFD-NEXT: addi a2, a2, -1 ; RV32IFD-NEXT: or a2, a2, t0 ; RV32IFD-NEXT: .LBB45_2: # %entry -; RV32IFD-NEXT: or a7, a0, a1 -; RV32IFD-NEXT: slti a6, a1, 0 +; RV32IFD-NEXT: or a7, a1, a0 +; RV32IFD-NEXT: slti a5, a0, 0 ; RV32IFD-NEXT: bnez a7, .LBB45_16 ; RV32IFD-NEXT: # %bb.3: # %entry ; RV32IFD-NEXT: mv t0, a4 -; RV32IFD-NEXT: bgez a1, .LBB45_17 +; RV32IFD-NEXT: bgez a0, .LBB45_17 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: bgeu a4, a5, .LBB45_18 +; RV32IFD-NEXT: bgeu a4, a6, .LBB45_18 ; RV32IFD-NEXT: .LBB45_5: # %entry ; RV32IFD-NEXT: beqz a7, .LBB45_7 ; RV32IFD-NEXT: .LBB45_6: # %entry ; RV32IFD-NEXT: mv a4, t0 ; RV32IFD-NEXT: .LBB45_7: # %entry -; RV32IFD-NEXT: neg a5, a6 -; RV32IFD-NEXT: and a5, a5, a1 -; RV32IFD-NEXT: srai a1, a1, 31 -; RV32IFD-NEXT: mv t0, a4 +; RV32IFD-NEXT: srai a6, a0, 31 +; RV32IFD-NEXT: and a1, a6, a1 +; RV32IFD-NEXT: seqz a6, a0 +; RV32IFD-NEXT: neg a5, a5 +; RV32IFD-NEXT: and a5, a5, a0 +; RV32IFD-NEXT: addi a6, a6, -1 +; RV32IFD-NEXT: mv a0, a4 ; RV32IFD-NEXT: bgez a5, .LBB45_9 ; RV32IFD-NEXT: # %bb.8: # %entry -; RV32IFD-NEXT: lui t0, 524288 +; RV32IFD-NEXT: lui a0, 524288 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: and a6, a6, a1 ; RV32IFD-NEXT: mv a1, a4 ; RV32IFD-NEXT: bltu a3, a4, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry ; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB45_11: # %entry -; RV32IFD-NEXT: and a6, a0, a5 +; RV32IFD-NEXT: and a6, a6, a5 ; RV32IFD-NEXT: li a7, -1 ; RV32IFD-NEXT: bne a6, a7, .LBB45_19 ; RV32IFD-NEXT: # %bb.12: # %entry @@ -3165,19 +3171,19 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB45_16: # %entry -; RV32IFD-NEXT: addi a2, a6, -1 +; RV32IFD-NEXT: addi a2, a5, -1 ; RV32IFD-NEXT: or a2, a2, t0 ; RV32IFD-NEXT: mv t0, a4 -; RV32IFD-NEXT: bltz a1, .LBB45_4 +; RV32IFD-NEXT: bltz a0, .LBB45_4 ; RV32IFD-NEXT: .LBB45_17: # %entry -; RV32IFD-NEXT: mv t0, a5 -; RV32IFD-NEXT: bltu a4, a5, .LBB45_5 +; RV32IFD-NEXT: mv t0, a6 +; RV32IFD-NEXT: bltu a4, a6, .LBB45_5 ; RV32IFD-NEXT: .LBB45_18: # %entry -; RV32IFD-NEXT: mv a4, a5 +; RV32IFD-NEXT: mv a4, a6 ; RV32IFD-NEXT: bnez a7, .LBB45_6 ; RV32IFD-NEXT: j .LBB45_7 ; RV32IFD-NEXT: .LBB45_19: # %entry -; RV32IFD-NEXT: mv a1, t0 +; RV32IFD-NEXT: mv a1, a0 ; RV32IFD-NEXT: mv a0, a2 ; RV32IFD-NEXT: beq a4, a3, .LBB45_13 ; RV32IFD-NEXT: .LBB45_20: # %entry @@ -3240,7 +3246,11 @@ define i64 @utest_f64i64_mm(double %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunsdfti@plt -; RV64-NEXT: snez a1, a1 +; RV64-NEXT: snez a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3364,8 +3374,12 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB47_2: # %entry -; RV64-NEXT: slti a1, a1, 1 -; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slti a3, a1, 1 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: beqz a2, .LBB47_4 ; RV64-NEXT: # %bb.3: # %entry @@ -3462,47 +3476,50 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a0, 20(sp) ; RV32-NEXT: lw t0, 8(sp) ; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 16(sp) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: addi a6, a3, -1 ; RV32-NEXT: mv a2, t0 -; RV32-NEXT: beq a4, a5, .LBB48_2 +; RV32-NEXT: beq a4, a6, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a2, a4, a5 +; RV32-NEXT: sltu a2, a4, a6 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: or a7, a0, a1 -; RV32-NEXT: slti a6, a1, 0 +; RV32-NEXT: or a7, a1, a0 +; RV32-NEXT: slti a5, a0, 0 ; RV32-NEXT: bnez a7, .LBB48_16 ; RV32-NEXT: # %bb.3: # %entry ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bgez a1, .LBB48_17 +; RV32-NEXT: bgez a0, .LBB48_17 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: bgeu a4, a5, .LBB48_18 +; RV32-NEXT: bgeu a4, a6, .LBB48_18 ; RV32-NEXT: .LBB48_5: # %entry ; RV32-NEXT: beqz a7, .LBB48_7 ; RV32-NEXT: .LBB48_6: # %entry ; RV32-NEXT: mv a4, t0 ; RV32-NEXT: .LBB48_7: # %entry -; RV32-NEXT: neg a5, a6 -; RV32-NEXT: and a5, a5, a1 -; RV32-NEXT: srai a1, a1, 31 -; RV32-NEXT: mv t0, a4 +; RV32-NEXT: srai a6, a0, 31 +; RV32-NEXT: and a1, a6, a1 +; RV32-NEXT: seqz a6, a0 +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: and a5, a5, a0 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: mv a0, a4 ; RV32-NEXT: bgez a5, .LBB48_9 ; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: lui t0, 524288 +; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a6, a6, a1 ; RV32-NEXT: mv a1, a4 ; RV32-NEXT: bltu a3, a4, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB48_11: # %entry -; RV32-NEXT: and a6, a0, a5 +; RV32-NEXT: and a6, a6, a5 ; RV32-NEXT: li a7, -1 ; RV32-NEXT: bne a6, a7, .LBB48_19 ; RV32-NEXT: # %bb.12: # %entry @@ -3519,19 +3536,19 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB48_16: # %entry -; RV32-NEXT: addi a2, a6, -1 +; RV32-NEXT: addi a2, a5, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltz a1, .LBB48_4 +; RV32-NEXT: bltz a0, .LBB48_4 ; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: mv t0, a5 -; RV32-NEXT: bltu a4, a5, .LBB48_5 +; RV32-NEXT: mv t0, a6 +; RV32-NEXT: bltu a4, a6, .LBB48_5 ; RV32-NEXT: .LBB48_18: # %entry -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: mv a4, a6 ; RV32-NEXT: bnez a7, .LBB48_6 ; RV32-NEXT: j .LBB48_7 ; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: mv a1, t0 +; RV32-NEXT: mv a1, a0 ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: beq a4, a3, .LBB48_13 ; RV32-NEXT: .LBB48_20: # %entry @@ -3592,7 +3609,11 @@ define i64 @utest_f32i64_mm(float %x) { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: snez a1, a1 +; RV64-NEXT: snez a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3687,8 +3708,12 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB50_2: # %entry -; RV64-NEXT: slti a1, a1, 1 -; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slti a3, a1, 1 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: beqz a2, .LBB50_4 ; RV64-NEXT: # %bb.3: # %entry @@ -3718,47 +3743,50 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a0, 20(sp) ; RV32-NEXT: lw t0, 8(sp) ; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 16(sp) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: addi a6, a3, -1 ; RV32-NEXT: mv a2, t0 -; RV32-NEXT: beq a4, a5, .LBB51_2 +; RV32-NEXT: beq a4, a6, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a2, a4, a5 +; RV32-NEXT: sltu a2, a4, a6 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: or a7, a0, a1 -; RV32-NEXT: slti a6, a1, 0 +; RV32-NEXT: or a7, a1, a0 +; RV32-NEXT: slti a5, a0, 0 ; RV32-NEXT: bnez a7, .LBB51_16 ; RV32-NEXT: # %bb.3: # %entry ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bgez a1, .LBB51_17 +; RV32-NEXT: bgez a0, .LBB51_17 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: bgeu a4, a5, .LBB51_18 +; RV32-NEXT: bgeu a4, a6, .LBB51_18 ; RV32-NEXT: .LBB51_5: # %entry ; RV32-NEXT: beqz a7, .LBB51_7 ; RV32-NEXT: .LBB51_6: # %entry ; RV32-NEXT: mv a4, t0 ; RV32-NEXT: .LBB51_7: # %entry -; RV32-NEXT: neg a5, a6 -; RV32-NEXT: and a5, a5, a1 -; RV32-NEXT: srai a1, a1, 31 -; RV32-NEXT: mv t0, a4 +; RV32-NEXT: srai a6, a0, 31 +; RV32-NEXT: and a1, a6, a1 +; RV32-NEXT: seqz a6, a0 +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: and a5, a5, a0 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: mv a0, a4 ; RV32-NEXT: bgez a5, .LBB51_9 ; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: lui t0, 524288 +; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a6, a6, a1 ; RV32-NEXT: mv a1, a4 ; RV32-NEXT: bltu a3, a4, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB51_11: # %entry -; RV32-NEXT: and a6, a0, a5 +; RV32-NEXT: and a6, a6, a5 ; RV32-NEXT: li a7, -1 ; RV32-NEXT: bne a6, a7, .LBB51_19 ; RV32-NEXT: # %bb.12: # %entry @@ -3775,19 +3803,19 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB51_16: # %entry -; RV32-NEXT: addi a2, a6, -1 +; RV32-NEXT: addi a2, a5, -1 ; RV32-NEXT: or a2, a2, t0 ; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltz a1, .LBB51_4 +; RV32-NEXT: bltz a0, .LBB51_4 ; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: mv t0, a5 -; RV32-NEXT: bltu a4, a5, .LBB51_5 +; RV32-NEXT: mv t0, a6 +; RV32-NEXT: bltu a4, a6, .LBB51_5 ; RV32-NEXT: .LBB51_18: # %entry -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: mv a4, a6 ; RV32-NEXT: bnez a7, .LBB51_6 ; RV32-NEXT: j .LBB51_7 ; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: mv a1, t0 +; RV32-NEXT: mv a1, a0 ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: beq a4, a3, .LBB51_13 ; RV32-NEXT: .LBB51_20: # %entry @@ -3894,7 +3922,11 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt -; RV64-NEXT: snez a1, a1 +; RV64-NEXT: snez a2, a1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -3993,8 +4025,12 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: li a2, 1 ; RV64-NEXT: .LBB53_2: # %entry -; RV64-NEXT: slti a1, a1, 1 -; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slti a3, a1, 1 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: beqz a2, .LBB53_4 ; RV64-NEXT: # %bb.3: # %entry diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index de9a54d..77faf67 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -5572,12 +5572,20 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt -; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, a1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: addi s1, s1, -1 +; CHECK-NOV-NEXT: seqz a2, s1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5615,7 +5623,15 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: addi s1, s1, -1 +; CHECK-V-NEXT: seqz a3, s1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a2, a3, a2 +; CHECK-V-NEXT: snez a3, a1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -5661,35 +5677,42 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti@plt -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: blez a1, .LBB47_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB47_2: # %entry ; CHECK-NOV-NEXT: mv a4, s1 ; CHECK-NOV-NEXT: blez s1, .LBB47_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry -; CHECK-NOV-NEXT: slti a1, a1, 1 -; CHECK-NOV-NEXT: neg a1, a1 -; CHECK-NOV-NEXT: slti a2, s1, 1 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: slti a0, s1, 1 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: addi s1, s1, -1 +; CHECK-NOV-NEXT: seqz a5, s1 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a0, a5, a0 ; CHECK-NOV-NEXT: beqz a4, .LBB47_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a4 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a2, a4, a2 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: .LBB47_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: beqz a3, .LBB47_8 +; CHECK-NOV-NEXT: and a1, a1, a3 +; CHECK-NOV-NEXT: beqz a2, .LBB47_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a0, a3 -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a1, a0, a1 +; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: .LBB47_8: # %entry -; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5730,29 +5753,37 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry ; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a4, a3 -; CHECK-V-NEXT: slti a1, a1, 1 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: addi a4, s0, -1 +; CHECK-V-NEXT: seqz a4, a4 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: slti a5, a1, 1 +; CHECK-V-NEXT: neg a5, a5 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a6, a1 ; CHECK-V-NEXT: blez s0, .LBB47_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB47_4: # %entry -; CHECK-V-NEXT: neg a3, a1 -; CHECK-V-NEXT: and a1, a4, s1 +; CHECK-V-NEXT: and a1, a5, a0 +; CHECK-V-NEXT: addi a5, a6, -1 +; CHECK-V-NEXT: and a0, a4, a3 ; CHECK-V-NEXT: beqz s0, .LBB47_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a4, s0 -; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: .LBB47_6: # %entry +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: .LBB47_6: # %entry +; CHECK-V-NEXT: and a1, a5, a1 ; CHECK-V-NEXT: beqz a2, .LBB47_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a0, a2, a0 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB47_8: # %entry -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a1, 32(sp) +; CHECK-V-NEXT: sd a1, 24(sp) +; CHECK-V-NEXT: sd a0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6031,12 +6062,20 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, a1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: addi s1, s1, -1 +; CHECK-NOV-NEXT: seqz a2, s1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6074,7 +6113,15 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 -; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: addi s1, s1, -1 +; CHECK-V-NEXT: seqz a3, s1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a2, a3, a2 +; CHECK-V-NEXT: snez a3, a1 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -6120,35 +6167,42 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti@plt -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: blez a1, .LBB50_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB50_2: # %entry ; CHECK-NOV-NEXT: mv a4, s1 ; CHECK-NOV-NEXT: blez s1, .LBB50_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry -; CHECK-NOV-NEXT: slti a1, a1, 1 -; CHECK-NOV-NEXT: neg a1, a1 -; CHECK-NOV-NEXT: slti a2, s1, 1 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: slti a0, s1, 1 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: addi s1, s1, -1 +; CHECK-NOV-NEXT: seqz a5, s1 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a0, a5, a0 ; CHECK-NOV-NEXT: beqz a4, .LBB50_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a4 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a2, a4, a2 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: .LBB50_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: beqz a3, .LBB50_8 +; CHECK-NOV-NEXT: and a1, a1, a3 +; CHECK-NOV-NEXT: beqz a2, .LBB50_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a0, a3 -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a1, a0, a1 +; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: .LBB50_8: # %entry -; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6189,29 +6243,37 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry ; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a4, a3 -; CHECK-V-NEXT: slti a1, a1, 1 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: addi a4, s0, -1 +; CHECK-V-NEXT: seqz a4, a4 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: slti a5, a1, 1 +; CHECK-V-NEXT: neg a5, a5 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a6, a1 ; CHECK-V-NEXT: blez s0, .LBB50_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB50_4: # %entry -; CHECK-V-NEXT: neg a3, a1 -; CHECK-V-NEXT: and a1, a4, s1 +; CHECK-V-NEXT: and a1, a5, a0 +; CHECK-V-NEXT: addi a5, a6, -1 +; CHECK-V-NEXT: and a0, a4, a3 ; CHECK-V-NEXT: beqz s0, .LBB50_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a4, s0 -; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: .LBB50_6: # %entry +; CHECK-V-NEXT: sgtz a3, s0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: .LBB50_6: # %entry +; CHECK-V-NEXT: and a1, a5, a1 ; CHECK-V-NEXT: beqz a2, .LBB50_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a0, a2, a0 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB50_8: # %entry -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a1, 32(sp) +; CHECK-V-NEXT: sd a1, 24(sp) +; CHECK-V-NEXT: sd a0, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) @@ -6485,12 +6547,20 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, a1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s2 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a1, a1, s1 +; CHECK-NOV-NEXT: addi s2, s2, -1 +; CHECK-NOV-NEXT: seqz a2, s2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6519,12 +6589,20 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: snez a2, a1 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a0, a2, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: snez a1, s2 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a1, a1, s1 +; CHECK-V-NEXT: addi s2, s2, -1 +; CHECK-V-NEXT: seqz a2, s2 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: sd a1, 8(sp) ; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 @@ -6568,35 +6646,42 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixsfti@plt -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: blez a1, .LBB53_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB53_2: # %entry ; CHECK-NOV-NEXT: mv a4, s1 ; CHECK-NOV-NEXT: blez s1, .LBB53_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a4, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry -; CHECK-NOV-NEXT: slti a1, a1, 1 -; CHECK-NOV-NEXT: neg a1, a1 -; CHECK-NOV-NEXT: slti a2, s1, 1 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: slti a0, s1, 1 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: addi s1, s1, -1 +; CHECK-NOV-NEXT: seqz a5, s1 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a0, a5, a0 ; CHECK-NOV-NEXT: beqz a4, .LBB53_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a4, a4 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a2, a4, a2 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: .LBB53_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: beqz a3, .LBB53_8 +; CHECK-NOV-NEXT: and a1, a1, a3 +; CHECK-NOV-NEXT: beqz a2, .LBB53_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a0, a3 -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a1, a0, a1 +; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: .LBB53_8: # %entry -; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6634,26 +6719,34 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a4, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry -; CHECK-V-NEXT: slti a1, a1, 1 -; CHECK-V-NEXT: neg a3, a1 -; CHECK-V-NEXT: slti a1, s1, 1 -; CHECK-V-NEXT: neg a1, a1 -; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a1, 1 +; CHECK-V-NEXT: neg a3, a3 +; CHECK-V-NEXT: and a3, a3, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: slti a0, s1, 1 +; CHECK-V-NEXT: neg a0, a0 +; CHECK-V-NEXT: and a0, a0, s0 +; CHECK-V-NEXT: addi s1, s1, -1 +; CHECK-V-NEXT: seqz a5, s1 +; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: and a0, a5, a0 ; CHECK-V-NEXT: beqz a4, .LBB53_6 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a4, a4 ; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: and a0, a4, a0 ; CHECK-V-NEXT: .LBB53_6: # %entry -; CHECK-V-NEXT: and a0, a3, a0 +; CHECK-V-NEXT: and a1, a1, a3 ; CHECK-V-NEXT: beqz a2, .LBB53_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a0, a2, a0 +; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB53_8: # %entry -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: sd a1, 0(sp) +; CHECK-V-NEXT: sd a1, 8(sp) +; CHECK-V-NEXT: sd a0, 0(sp) ; CHECK-V-NEXT: addi a0, sp, 8 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v9, (a0) diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index d6376d3..d3297d2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -1817,94 +1817,100 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mvn r11, #-2147483648 -; CHECK-NEXT: cmp r1, r11 ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: csel r0, r1, r11, lo +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: csel r1, r3, r0, mi +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: it ne +; CHECK-NEXT: andne.w r0, r2, r0, asr #31 +; CHECK-NEXT: mvn r11, #-2147483648 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: cmp r6, r11 ; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: csel r1, r1, r11, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r4, r0, r1, eq -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: add.w r3, r0, #1 +; CHECK-NEXT: csel r0, r6, r11, lo ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mov.w r1, #-2147483648 -; CHECK-NEXT: csel r8, r5, r0, mi -; CHECK-NEXT: and.w r0, r2, r5, asr #31 -; CHECK-NEXT: and.w r0, r0, r8 -; CHECK-NEXT: cmp.w r8, #-1 -; CHECK-NEXT: add.w r2, r0, #1 -; CHECK-NEXT: csel r0, r4, r1, gt -; CHECK-NEXT: cmp.w r4, #-2147483648 +; CHECK-NEXT: csel r7, r6, r11, mi +; CHECK-NEXT: orrs r2, r5 ; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r1, r4, r1, hi -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r8, r0, r7, eq +; CHECK-NEXT: mov.w r2, #-2147483648 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: csel r0, r8, r2, gt +; CHECK-NEXT: cmp.w r8, #-2147483648 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csel r1, r8, r2, hi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: csel r0, r1, r0, eq +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov lr, r0 ; CHECK-NEXT: csel r7, r1, r11, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r6, r1, r11, mi -; CHECK-NEXT: orrs.w r0, r2, r3 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: csel r6, r7, r6, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csel r7, r3, r0, mi -; CHECK-NEXT: and.w r2, r2, r3, asr #31 -; CHECK-NEXT: cmp.w r7, #-1 -; CHECK-NEXT: mov.w r0, #-2147483648 -; CHECK-NEXT: csel lr, r6, r0, gt -; CHECK-NEXT: cmp.w r6, #-2147483648 -; CHECK-NEXT: and.w r2, r2, r7 -; CHECK-NEXT: csel r0, r6, r0, hi +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: csel r4, r1, r11, mi +; CHECK-NEXT: orrs r3, r2 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r7, r7, r4, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r4, r0, r9, mi +; CHECK-NEXT: mov.w r3, #-2147483648 +; CHECK-NEXT: cmp.w r4, #-1 +; CHECK-NEXT: csel r9, r7, r3, gt +; CHECK-NEXT: cmp.w r7, #-2147483648 +; CHECK-NEXT: csel r12, r7, r3, hi +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: andne.w r3, r2, r3, asr #31 +; CHECK-NEXT: and.w r2, r3, r4 +; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: adds r2, #1 ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: csel r0, r0, lr, eq -; CHECK-NEXT: mov.w lr, #-1 -; CHECK-NEXT: cmp r9, r11 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r2, r10, lr, lo -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: csel r2, r10, r2, eq +; CHECK-NEXT: csel r12, r12, r9, eq +; CHECK-NEXT: cmp r6, r11 +; CHECK-NEXT: csel r6, r10, r3, lo +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csel r6, r10, r6, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r10, lr, mi -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r2, r2, r5, eq -; CHECK-NEXT: cmp.w r4, #-2147483648 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r4, r2, r5, hi -; CHECK-NEXT: csel r4, r2, r4, eq -; CHECK-NEXT: cmp.w r8, #-1 -; CHECK-NEXT: csel r2, r2, r5, gt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r2, r4, r2, eq +; CHECK-NEXT: csel r5, r10, r3, mi +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r5, r6, r5, eq +; CHECK-NEXT: cmp.w r8, #-2147483648 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: csel r6, r5, r8, hi +; CHECK-NEXT: csel r6, r5, r6, eq +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r5, r5, r8, gt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csel r5, r6, r5, eq ; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: csel r1, r12, lr, lo -; CHECK-NEXT: csel r1, r12, r1, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r0, r12, lr, mi -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: csel r1, lr, r3, lo +; CHECK-NEXT: csel r1, lr, r1, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r0, lr, r3, mi +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: cmp.w r6, #-2147483648 -; CHECK-NEXT: csel r1, r0, r5, hi +; CHECK-NEXT: cmp.w r7, #-2147483648 +; CHECK-NEXT: csel r1, r0, r8, hi +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload ; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r7, #-1 -; CHECK-NEXT: csel r0, r0, r5, gt -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: cmp.w r4, #-1 +; CHECK-NEXT: csel r0, r0, r8, gt +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -2068,96 +2074,105 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #20 -; CHECK-NEXT: sub sp, #20 -; CHECK-NEXT: vmov r7, r0, d0 +; CHECK-NEXT: .pad #28 +; CHECK-NEXT: sub sp, #28 +; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mvn r9, #-2147483648 -; CHECK-NEXT: cmp r1, r9 ; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: csel r0, r1, r9, lo +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: csel r1, r3, r0, mi +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: it ne +; CHECK-NEXT: andne.w r0, r2, r0, asr #31 +; CHECK-NEXT: mvn r10, #-2147483648 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: cmp r6, r10 ; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: csel r1, r1, r9, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: mov.w r8, #-2147483648 -; CHECK-NEXT: csel r4, r0, r1, eq -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: add.w r3, r0, #1 +; CHECK-NEXT: csel r0, r6, r10, lo ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r10, r5, r0, mi -; CHECK-NEXT: and.w r0, r2, r5, asr #31 -; CHECK-NEXT: and.w r0, r0, r10 -; CHECK-NEXT: cmp.w r10, #-1 -; CHECK-NEXT: add.w r2, r0, #1 -; CHECK-NEXT: csel r0, r4, r8, gt -; CHECK-NEXT: cmp.w r4, #-2147483648 -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: csel r1, r4, r8, hi -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r7, r6, r10, mi +; CHECK-NEXT: orrs r2, r5 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csel r8, r0, r7, eq +; CHECK-NEXT: mov.w r2, #-2147483648 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: csel r0, r8, r2, gt +; CHECK-NEXT: cmp.w r8, #-2147483648 +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: csel r1, r8, r2, hi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: cmp r1, r9 +; CHECK-NEXT: cmp r1, r10 ; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r12, r1, r9, lo +; CHECK-NEXT: csel r7, r1, r10, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r7, r1, r9, mi -; CHECK-NEXT: orrs.w r0, r2, r3 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r7, r12, r7, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csel r12, r3, r0, mi -; CHECK-NEXT: and.w r2, r2, r3, asr #31 -; CHECK-NEXT: cmp.w r12, #-1 -; CHECK-NEXT: and.w r2, r2, r12 -; CHECK-NEXT: csel r0, r7, r8, gt +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: csel r4, r1, r10, mi +; CHECK-NEXT: orrs.w r3, r2, r0 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: csel r7, r7, r4, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r4, r0, r9, mi +; CHECK-NEXT: mov.w r3, #-2147483648 +; CHECK-NEXT: cmp.w r4, #-1 +; CHECK-NEXT: csel r9, r7, r3, gt ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r8, r7, r8, hi +; CHECK-NEXT: csel r12, r7, r3, hi +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: andne.w r3, r2, r3, asr #31 +; CHECK-NEXT: and.w r2, r3, r4 +; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: csel r8, r8, r0, eq -; CHECK-NEXT: mov.w r0, #-1 -; CHECK-NEXT: cmp r6, r9 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: csel r6, r11, r0, lo -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r12, r12, r9, eq +; CHECK-NEXT: cmp r6, r10 +; CHECK-NEXT: csel r6, r11, r3, lo +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: csel r6, r11, r6, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r11, r0, mi +; CHECK-NEXT: csel r5, r11, r3, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r4, #-2147483648 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: csel r4, r5, r6, hi -; CHECK-NEXT: csel r4, r5, r4, eq -; CHECK-NEXT: cmp.w r10, #-1 -; CHECK-NEXT: csel r5, r5, r6, gt +; CHECK-NEXT: cmp.w r8, #-2147483648 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: csel r6, r5, r8, hi +; CHECK-NEXT: csel r6, r5, r6, eq +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: csel r5, r5, r8, gt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r4, r4, r5, eq -; CHECK-NEXT: cmp r1, r9 -; CHECK-NEXT: csel r1, lr, r0, lo -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r5, r6, r5, eq +; CHECK-NEXT: cmp r1, r10 +; CHECK-NEXT: csel r1, lr, r3, lo ; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r0, lr, r0, mi +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r0, lr, r3, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r6, hi -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: csel r1, r0, r8, hi +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r12, #-1 -; CHECK-NEXT: csel r0, r0, r6, gt +; CHECK-NEXT: cmp.w r4, #-1 +; CHECK-NEXT: csel r0, r0, r8, gt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r8, r0 -; CHECK-NEXT: add sp, #20 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 +; CHECK-NEXT: add sp, #28 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -2320,91 +2335,100 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mvn r11, #-2147483648 -; CHECK-NEXT: cmp r1, r11 ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: csel r0, r1, r11, lo +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: csel r1, r3, r0, mi +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: it ne +; CHECK-NEXT: andne.w r0, r2, r0, asr #31 +; CHECK-NEXT: mvn r11, #-2147483648 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: cmp r6, r11 ; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: csel r1, r1, r11, mi -; CHECK-NEXT: orrs r3, r2 -; CHECK-NEXT: mov.w r9, #-2147483648 -; CHECK-NEXT: csel r4, r0, r1, eq -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: add.w r3, r0, #1 +; CHECK-NEXT: csel r0, r6, r11, lo ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: csel r8, r5, r0, mi -; CHECK-NEXT: and.w r0, r2, r5, asr #31 -; CHECK-NEXT: and.w r0, r0, r8 -; CHECK-NEXT: cmp.w r8, #-1 -; CHECK-NEXT: add.w r2, r0, #1 -; CHECK-NEXT: csel r0, r4, r9, gt -; CHECK-NEXT: cmp.w r4, #-2147483648 +; CHECK-NEXT: csel r7, r6, r11, mi +; CHECK-NEXT: orrs r2, r5 ; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: csel r1, r4, r9, hi -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r8, r0, r7, eq +; CHECK-NEXT: mov.w r2, #-2147483648 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: csel r0, r8, r2, gt +; CHECK-NEXT: cmp.w r8, #-2147483648 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: csel r1, r8, r2, hi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: csel r0, r1, r0, eq +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp r1, r11 ; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: csel r12, r1, r11, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r7, r1, r11, mi -; CHECK-NEXT: orrs.w r0, r2, r3 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: csel r7, r12, r7, eq +; CHECK-NEXT: csel r7, r1, r11, lo ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csel r12, r3, r0, mi -; CHECK-NEXT: and.w r2, r2, r3, asr #31 -; CHECK-NEXT: cmp.w r12, #-1 -; CHECK-NEXT: and.w r2, r2, r12 -; CHECK-NEXT: csel r0, r7, r9, gt +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: csel r4, r1, r11, mi +; CHECK-NEXT: orrs r3, r2 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: csel r7, r7, r4, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r4, r0, r9, mi +; CHECK-NEXT: mov.w r3, #-2147483648 +; CHECK-NEXT: cmp.w r4, #-1 +; CHECK-NEXT: csel r9, r7, r3, gt ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r9, r7, r9, hi +; CHECK-NEXT: csel r12, r7, r3, hi +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: andne.w r3, r2, r3, asr #31 +; CHECK-NEXT: and.w r2, r3, r4 +; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: adds r2, #1 -; CHECK-NEXT: csel r9, r9, r0, eq -; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: csel r12, r12, r9, eq ; CHECK-NEXT: cmp r6, r11 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: csel r6, r10, r0, lo -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: csel r6, r10, r3, lo +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: csel r6, r10, r6, eq ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r5, r10, r0, mi +; CHECK-NEXT: csel r5, r10, r3, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r5, r6, r5, eq -; CHECK-NEXT: cmp.w r4, #-2147483648 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r4, r5, r6, hi -; CHECK-NEXT: csel r4, r5, r4, eq -; CHECK-NEXT: cmp.w r8, #-1 -; CHECK-NEXT: csel r5, r5, r6, gt +; CHECK-NEXT: cmp.w r8, #-2147483648 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: csel r6, r5, r8, hi +; CHECK-NEXT: csel r6, r5, r6, eq +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r5, r5, r8, gt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r4, r4, r5, eq +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csel r5, r6, r5, eq ; CHECK-NEXT: cmp r1, r11 -; CHECK-NEXT: csel r1, lr, r0, lo -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r1, lr, r3, lo ; CHECK-NEXT: csel r1, lr, r1, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r0, lr, r0, mi +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r0, lr, r3, mi ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq ; CHECK-NEXT: cmp.w r7, #-2147483648 -; CHECK-NEXT: csel r1, r0, r6, hi -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csel r1, r0, r8, hi +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload ; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp.w r12, #-1 -; CHECK-NEXT: csel r0, r0, r6, gt +; CHECK-NEXT: cmp.w r4, #-1 +; CHECK-NEXT: csel r0, r0, r8, gt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r12, r0 ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index e539f00..9a39c62 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -574,11 +574,16 @@ define i64 @utest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -672,6 +677,7 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -679,6 +685,10 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -857,6 +867,7 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -864,6 +875,10 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -1494,11 +1509,16 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -1532,6 +1552,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1539,6 +1560,10 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -1602,11 +1627,16 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui float %x to i128 @@ -1640,6 +1670,7 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1647,6 +1678,10 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 @@ -1714,11 +1749,16 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i128 @@ -1754,6 +1794,7 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1761,6 +1802,10 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index facd15f..007802d 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -2309,17 +2309,27 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2371,6 +2381,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2378,6 +2389,10 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 @@ -2393,6 +2408,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2400,6 +2416,10 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 @@ -2591,17 +2611,27 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2653,6 +2683,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2660,6 +2691,10 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 @@ -2675,6 +2710,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2682,6 +2718,10 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 @@ -2877,17 +2917,27 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2941,6 +2991,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 @@ -2948,6 +2999,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 6 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 @@ -2963,6 +3018,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 @@ -2970,6 +3026,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.select +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i64.const 1 +; CHECK-NEXT: i64.eq +; CHECK-NEXT: i64.select ; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index 87e9a58..ac6d9d3 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -1081,6 +1081,8 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1099,9 +1101,11 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: callq __fixdfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi +; CHECK-NEXT: cmovgq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx @@ -1143,6 +1147,8 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1161,9 +1167,11 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: callq __fixsfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi +; CHECK-NEXT: cmovgq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx @@ -1213,6 +1221,8 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -1231,9 +1241,11 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: callq __fixhfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: cmovleq %rdx, %rsi +; CHECK-NEXT: cmovgq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: popq %rcx diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 22fbc1c..c351c1b 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -2762,8 +2762,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx +; CHECK-NEXT: cmpq $1, %r14 +; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2801,15 +2805,20 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: callq __fixdfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: cmovgq %rsi, %rdx +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: cmovleq %rdx, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovgq %rcx, %rbx ; CHECK-NEXT: cmovleq %r14, %rsi +; CHECK-NEXT: cmovgq %rcx, %rbx +; CHECK-NEXT: cmpq $1, %r14 +; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 @@ -2917,8 +2926,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx +; CHECK-NEXT: cmpq $1, %r14 +; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2956,15 +2969,20 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: callq __fixsfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: cmovgq %rsi, %rdx +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: cmovleq %rdx, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovgq %rcx, %rbx ; CHECK-NEXT: cmovleq %r14, %rsi +; CHECK-NEXT: cmovgq %rcx, %rbx +; CHECK-NEXT: cmpq $1, %r14 +; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 @@ -3073,8 +3091,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: cmovneq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx +; CHECK-NEXT: cmpq $1, %r14 +; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -3112,15 +3134,20 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: callq __fixhfti@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: cmovgq %rsi, %rdx +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: cmovleq %rdx, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: cmovgq %rcx, %rbx ; CHECK-NEXT: cmovleq %r14, %rsi +; CHECK-NEXT: cmovgq %rcx, %rbx +; CHECK-NEXT: cmpq $1, %r14 +; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: cmovsq %rcx, %rbx -; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: movq %rbx, %xmm1 diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index f0ce231..b738765 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -370,117 +370,123 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $64, %esp +; X86-NEXT: subl $88, %esp ; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl 12(%ebp), %ebx -; X86-NEXT: movl 20(%ebp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $31, %ebx, %edi -; X86-NEXT: shldl $31, %ecx, %ebx +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $31, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $31, %ecx, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll $31, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %edx -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 +; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: testl %esi, %esi -; X86-NEXT: sets %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx -; X86-NEXT: sets %cl -; X86-NEXT: xorb %al, %cl -; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $1, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %ebx +; X86-NEXT: testl %edi, %edi +; X86-NEXT: sets %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: sets %dl +; X86-NEXT: xorb %al, %dl +; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: setne %bh -; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl 20(%ebp) ; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 +; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: sbbl $0, %edx -; X86-NEXT: setb %bl -; X86-NEXT: testb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovel %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addb $255, %bl -; X86-NEXT: sbbl $0, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X86-NEXT: testb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: setne %al +; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovsl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmovsl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl %esi, %eax +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: andl %ecx, %edi +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmovel %ebx, %edi +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: cmpl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF -; X86-NEXT: cmovbl %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovnsl %eax, %esi +; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: cmovbl %ecx, %edx ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF -; X86-NEXT: cmovnsl %ebx, %edx -; X86-NEXT: movl $-1, %ebx -; X86-NEXT: cmovnsl %ebx, %eax -; X86-NEXT: movl $0, %ebx -; X86-NEXT: cmovsl %ecx, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: orl %ecx, %edi -; X86-NEXT: cmovel %esi, %eax -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: cmovsl %ecx, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: cmpl $-2147483647, %edx # imm = 0x80000001 -; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000 -; X86-NEXT: cmovael %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: andl %eax, %esi -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000 -; X86-NEXT: cmovsl %edi, %edx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovsl %edi, %eax +; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl $31, %ecx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: andl %ebx, %ecx -; X86-NEXT: cmpl $-1, %ecx -; X86-NEXT: cmovel %esi, %eax -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 +; X86-NEXT: cmovsl %ebx, %edx +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovsl %ebx, %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %edi +; X86-NEXT: cmovel %ecx, %esi +; X86-NEXT: cmovel %eax, %edx +; X86-NEXT: movl %esi, %eax ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -836,78 +842,100 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $192, %esp -; X86-NEXT: movl 24(%ebp), %ebx -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl 28(%ebp), %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: subl $256, %esp # imm = 0x100 +; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarl $31, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: leal (%edi,%edi), %eax +; X86-NEXT: shrl $31, %edi +; X86-NEXT: shldl $31, %eax, %edi ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edx +; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 +; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 40(%ebp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: leal (%ebx,%ebx), %eax -; X86-NEXT: shrl $31, %ebx -; X86-NEXT: shldl $31, %eax, %ebx ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp +; X86-NEXT: movl 36(%ebp), %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax +; X86-NEXT: calll __modti3 +; X86-NEXT: addl $32, %esp +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ecx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 36(%ebp), %edx +; X86-NEXT: movl 40(%ebp), %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %ecx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: sarl $31, %edi ; X86-NEXT: leal (%ecx,%ecx), %eax ; X86-NEXT: shrl $31, %ecx ; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi @@ -921,51 +949,27 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 40(%ebp) ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 32(%ebp), %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl 16(%ebp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl 28(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax @@ -973,282 +977,327 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: addl $32, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl $0, %edi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %esi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl $0, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %al +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: sets %dl -; X86-NEXT: xorb %al, %dl +; X86-NEXT: sets %bl +; X86-NEXT: testl %edi, %edi +; X86-NEXT: sets %bh +; X86-NEXT: xorb %bl, %bh +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: orl %eax, %ebx +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al -; X86-NEXT: testb %dl, %al -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: testb %bh, %al +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: andl %edi, %eax -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: movl $0, %ebx -; X86-NEXT: cmovsl %ecx, %ebx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovsl (%esp), %esi # 4-byte Folded Reload -; X86-NEXT: negl %edi -; X86-NEXT: movl $0, %edi -; X86-NEXT: sbbl %edi, %edi -; X86-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel %esi, %edi -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovel %esi, %eax -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovel %edi, %ecx -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovsl %esi, %edi -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovsl %esi, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: andl %ebx, %edx -; X86-NEXT: cmpl $-1, %edx -; X86-NEXT: cmovnel %eax, %edx -; X86-NEXT: cmovel %ecx, %edi -; X86-NEXT: shrdl $1, %edx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl 32(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %edi ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %al -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: sets %dl -; X86-NEXT: xorb %al, %dl +; X86-NEXT: sets %bl +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %bh +; X86-NEXT: xorb %bl, %bh +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: orl %eax, %ebx +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al -; X86-NEXT: testb %dl, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmovel %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: testb %bh, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, %ebx -; X86-NEXT: andl %ecx, %ebx -; X86-NEXT: testl %edi, %edi -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl (%esp), %eax # 4-byte Folded Reload -; X86-NEXT: negl %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ecx, %ecx -; X86-NEXT: orl (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: orl %edx, %edi -; X86-NEXT: cmovnel %eax, %ecx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovel %edi, %ebx -; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx -; X86-NEXT: cmovsl %edi, %ecx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovsl %ecx, %ebx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: andl %edx, %esi -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovnel %ebx, %esi -; X86-NEXT: cmovel %eax, %edi -; X86-NEXT: shrdl $1, %esi, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %esi +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ebx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: testl %edx, %edx ; X86-NEXT: sets %al -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %dl -; X86-NEXT: xorb %al, %dl +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: sets %bl +; X86-NEXT: xorb %al, %bl +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax +; X86-NEXT: calll __modti3 +; X86-NEXT: addl $32, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi -; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al -; X86-NEXT: testb %dl, %al -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovsl %ebx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %edi -; X86-NEXT: cmovsl %edx, %edi -; X86-NEXT: negl %ecx -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edx, %edx -; X86-NEXT: orl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: cmovnel %edi, %edx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovel %edi, %eax -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovel %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovsl %edi, %edx -; X86-NEXT: movl $-1, %edi -; X86-NEXT: cmovsl %edi, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: andl %ebx, %esi -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovnel %eax, %esi -; X86-NEXT: cmovel %ecx, %edx -; X86-NEXT: shrdl $1, %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testb %bl, %al +; X86-NEXT: cmovel %edi, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $1, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl $0, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %al +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edx ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bl -; X86-NEXT: xorb %al, %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %bh +; X86-NEXT: xorb %bl, %bh ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al -; X86-NEXT: testb %bl, %al -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb %bh, %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %edx, %edx +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovsl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovel %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovsl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovel %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovsl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: sarl $31, %ebx ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: andl %esi, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovsl %ecx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %edi -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: negl %esi +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovsl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovel %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl %eax, %ebx +; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovnel %esi, %ecx +; X86-NEXT: movl $0, %edx +; X86-NEXT: cmovel %edx, %ebx +; X86-NEXT: cmpl $-1, %ebx ; X86-NEXT: movl $0, %esi -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: orl (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovnel %edi, %esi -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: cmovel %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovsl %edx, %ecx +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovsl %edx, %ebx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $0, %ecx ; X86-NEXT: cmovel %esi, %ecx -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: cmovsl %edi, %esi +; X86-NEXT: cmovnel %ebx, %eax +; X86-NEXT: shldl $31, %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl %eax, %edi +; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl $0, %esi +; X86-NEXT: cmovel %esi, %edi +; X86-NEXT: cmpl $-1, %edi +; X86-NEXT: movl $0, %edx +; X86-NEXT: cmovel %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmovsl %esi, %eax +; X86-NEXT: movl $-1, %ebx +; X86-NEXT: cmovsl %ebx, %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %ecx +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: cmovnel %edi, %ecx +; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl %eax, %edx +; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: cmpl $-1, %edx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovel %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmovsl %esi, %eax ; X86-NEXT: movl $-1, %edi -; X86-NEXT: cmovsl %edi, %eax -; X86-NEXT: andl (%esp), %ebx # 4-byte Folded Reload +; X86-NEXT: cmovsl %edi, %edx ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: cmovnel %eax, %ebx -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: shrdl $1, %ebx, %esi +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: cmovnel %edx, %ebx +; X86-NEXT: shldl $31, %eax, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl %eax, %edi +; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovel %esi, %edi +; X86-NEXT: cmpl $-1, %edi +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovel %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: testl %edx, %edx +; X86-NEXT: cmovsl %esi, %eax +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovsl %esi, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: andl %edx, %esi +; X86-NEXT: cmpl $-1, %esi +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: cmovnel %edi, %esi +; X86-NEXT: shldl $31, %eax, %esi ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ebx, 8(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, 4(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload