From 139392c0a58008b7451e1a1943f5022dc920928b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 23 May 2023 09:19:37 -0700 Subject: [PATCH] [LegalizeTypes][ARM][AArch6][RISCV][VE][WebAssembly] Add special case for smin(X, -1) and smax(X, 0) to ExpandIntRes_MINMAX. We can compute a simpler expression for Lo for these cases. This is an alternative for the test cases in D151180 that works for more targets. This is similar to some of the special cases we have for expanding setcc operands. Differential Revision: https://reviews.llvm.org/D151182 --- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 16 + llvm/test/CodeGen/AArch64/fpclamptosat.ll | 9 +- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 18 +- llvm/test/CodeGen/ARM/fpclamptosat.ll | 916 +++++++-------------- llvm/test/CodeGen/ARM/fpclamptosat_vec.ll | 501 ++++------- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 355 +++----- llvm/test/CodeGen/RISCV/min-max.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 214 ++--- llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll | 333 +++----- llvm/test/CodeGen/VE/Scalar/smax.ll | 10 +- llvm/test/CodeGen/WebAssembly/fpclamptosat.ll | 50 +- llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll | 60 +- 12 files changed, 874 insertions(+), 1628 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 7e85a2e..f2c71be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2936,6 +2936,22 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, // Hi part is always the same op Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); + // The Lo of smin(X, -1) is LHSL if X is negative. Otherwise it's -1. + if (N->getOpcode() == ISD::SMIN && isAllOnesConstant(RHS)) { + SDValue HiNeg = + DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT); + Lo = DAG.getSelect(DL, NVT, HiNeg, LHSL, DAG.getConstant(-1, DL, NVT)); + return; + } + + // The Lo of smax(X, 0) is 0 if X is negative. Otherwise it's LHSL. + if (N->getOpcode() == ISD::SMAX && isNullConstant(RHS)) { + SDValue HiNeg = + DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT); + Lo = DAG.getSelect(DL, NVT, HiNeg, DAG.getConstant(0, DL, NVT), LHSL); + return; + } + // We need to know whether to select Lo part that corresponds to 'winning' // Hi part or if Hi parts are equal. SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC); diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll index 425cdcb..9f10bce 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -906,8 +906,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: csinc x9, x1, xzr, lt ; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x9, x8, xzr, gt -; CHECK-NEXT: csel x0, x8, x9, eq +; CHECK-NEXT: csel x0, xzr, x8, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -963,8 +962,7 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: csinc x9, x1, xzr, lt ; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x9, x8, xzr, gt -; CHECK-NEXT: csel x0, x8, x9, eq +; CHECK-NEXT: csel x0, xzr, x8, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -1026,8 +1024,7 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: csinc x9, x1, xzr, lt ; CHECK-NEXT: csel x8, xzr, x8, eq ; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x9, x8, xzr, gt -; CHECK-NEXT: csel x0, x8, x9, eq +; CHECK-NEXT: csel x0, xzr, x8, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 9d16f0b..38ec6a0 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -1042,11 +1042,9 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: csinc x11, x20, xzr, lt ; CHECK-NEXT: csel x10, xzr, x10, eq ; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x11, x10, xzr, gt -; CHECK-NEXT: csel x10, x10, x11, eq +; CHECK-NEXT: csel x10, xzr, x10, lt ; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x9, x8, xzr, gt -; CHECK-NEXT: csel x8, x8, x9, eq +; CHECK-NEXT: csel x8, xzr, x8, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 @@ -1149,11 +1147,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: csinc x11, x20, xzr, lt ; CHECK-NEXT: csel x10, xzr, x10, eq ; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x11, x10, xzr, gt -; CHECK-NEXT: csel x10, x10, x11, eq +; CHECK-NEXT: csel x10, xzr, x10, lt ; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x9, x8, xzr, gt -; CHECK-NEXT: csel x8, x8, x9, eq +; CHECK-NEXT: csel x8, xzr, x8, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 @@ -1268,11 +1264,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: csinc x11, x20, xzr, lt ; CHECK-NEXT: csel x10, xzr, x10, eq ; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x11, x10, xzr, gt -; CHECK-NEXT: csel x10, x10, x11, eq +; CHECK-NEXT: csel x10, xzr, x10, lt ; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: csel x9, x8, xzr, gt -; CHECK-NEXT: csel x8, x8, x9, eq +; CHECK-NEXT: csel x8, xzr, x8, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 91c1a21..66fe5d9 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -2260,42 +2260,36 @@ define i32 @ustest_f64i32_mm(double %x) { ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r3, r0 -; SOFT-NEXT: bpl .LBB29_7 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: bpl .LBB29_5 ; SOFT-NEXT: @ %bb.1: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB29_8 +; SOFT-NEXT: bne .LBB29_6 ; SOFT-NEXT: .LBB29_2: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bmi .LBB29_4 +; SOFT-NEXT: bpl .LBB29_7 ; SOFT-NEXT: .LBB29_3: @ %entry -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: .LBB29_4: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r3, r0 -; SOFT-NEXT: ble .LBB29_9 -; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB29_10 -; SOFT-NEXT: .LBB29_6: @ %entry +; SOFT-NEXT: bpl .LBB29_8 +; SOFT-NEXT: .LBB29_4: @ %entry ; SOFT-NEXT: pop {r7, pc} -; SOFT-NEXT: .LBB29_7: @ %entry -; SOFT-NEXT: mvns r3, r2 +; SOFT-NEXT: .LBB29_5: @ %entry +; SOFT-NEXT: mvns r3, r0 ; SOFT-NEXT: cmp r1, #0 ; SOFT-NEXT: beq .LBB29_2 -; SOFT-NEXT: .LBB29_8: @ %entry -; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB29_6: @ %entry +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bpl .LBB29_3 -; SOFT-NEXT: b .LBB29_4 -; SOFT-NEXT: .LBB29_9: @ %entry -; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: bmi .LBB29_3 +; SOFT-NEXT: .LBB29_7: @ %entry +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB29_6 -; SOFT-NEXT: .LBB29_10: @ %entry -; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: bmi .LBB29_4 +; SOFT-NEXT: .LBB29_8: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: ustest_f64i32_mm: @@ -2310,14 +2304,11 @@ define i32 @ustest_f64i32_mm(double %x) { ; VFP2-NEXT: movpl.w r2, #-1 ; VFP2-NEXT: it ne ; VFP2-NEXT: movne r0, r2 -; VFP2-NEXT: mov.w r2, #0 ; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, r2 +; VFP2-NEXT: movpl r1, #0 ; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r2, r0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r2 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f64i32_mm: @@ -2442,38 +2433,35 @@ define i32 @ustest_f32i32_mm(float %x) { ; SOFT-NEXT: .save {r4, lr} ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 ; SOFT-NEXT: cmp r1, #0 ; SOFT-NEXT: mov r3, r1 ; SOFT-NEXT: bmi .LBB32_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: .LBB32_2: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: bmi .LBB32_4 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bpl .LBB32_6 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mvns r4, r2 -; SOFT-NEXT: .LBB32_4: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB32_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB32_6: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: ble .LBB32_9 -; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: bne .LBB32_7 +; SOFT-NEXT: .LBB32_4: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB32_10 -; SOFT-NEXT: .LBB32_8: @ %entry +; SOFT-NEXT: bpl .LBB32_8 +; SOFT-NEXT: .LBB32_5: @ %entry ; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB32_9: @ %entry -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: .LBB32_6: @ %entry +; SOFT-NEXT: mvns r4, r0 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: beq .LBB32_4 +; SOFT-NEXT: .LBB32_7: @ %entry +; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB32_8 -; SOFT-NEXT: .LBB32_10: @ %entry -; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: bmi .LBB32_5 +; SOFT-NEXT: .LBB32_8: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; ; VFP-LABEL: ustest_f32i32_mm: @@ -2626,38 +2614,35 @@ define i32 @ustest_f16i32_mm(half %x) { ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 ; SOFT-NEXT: cmp r1, #0 ; SOFT-NEXT: mov r3, r1 ; SOFT-NEXT: bmi .LBB35_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: .LBB35_2: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: bmi .LBB35_4 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bpl .LBB35_6 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mvns r4, r2 -; SOFT-NEXT: .LBB35_4: @ %entry ; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB35_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB35_6: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: ble .LBB35_9 -; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: bne .LBB35_7 +; SOFT-NEXT: .LBB35_4: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB35_10 -; SOFT-NEXT: .LBB35_8: @ %entry +; SOFT-NEXT: bpl .LBB35_8 +; SOFT-NEXT: .LBB35_5: @ %entry ; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB35_9: @ %entry -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: .LBB35_6: @ %entry +; SOFT-NEXT: mvns r4, r0 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: beq .LBB35_4 +; SOFT-NEXT: .LBB35_7: @ %entry +; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB35_8 -; SOFT-NEXT: .LBB35_10: @ %entry -; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: bmi .LBB35_5 +; SOFT-NEXT: .LBB35_8: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f16i32_mm: @@ -3457,224 +3442,135 @@ define i64 @ustest_f64i64_mm(double %x) { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: eors r4, r5 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: subs r7, r2, #1 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: sbcs r7, r6 -; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: movs r7, #1 +; SOFT-NEXT: mov r6, r2 +; SOFT-NEXT: eors r6, r7 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: sbcs r2, r5 ; SOFT-NEXT: blt .LBB47_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: .LBB47_2: @ %entry -; SOFT-NEXT: orrs r4, r3 +; SOFT-NEXT: orrs r6, r3 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB47_25 +; SOFT-NEXT: beq .LBB47_12 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB47_26 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB47_13 ; SOFT-NEXT: .LBB47_4: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB47_27 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB47_6 ; SOFT-NEXT: .LBB47_5: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB47_7 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: .LBB47_6: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: .LBB47_7: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: bne .LBB47_9 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: .LBB47_9: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: beq .LBB47_11 -; SOFT-NEXT: @ %bb.10: @ %entry -; SOFT-NEXT: mov r4, r7 -; SOFT-NEXT: .LBB47_11: @ %entry -; SOFT-NEXT: cmp r2, #1 -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: bhs .LBB47_28 -; SOFT-NEXT: @ %bb.12: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB47_29 -; SOFT-NEXT: .LBB47_13: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB47_30 -; SOFT-NEXT: .LBB47_14: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB47_16 -; SOFT-NEXT: .LBB47_15: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB47_16: @ %entry -; SOFT-NEXT: rsbs r2, r7, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB47_31 -; SOFT-NEXT: @ %bb.17: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB47_32 -; SOFT-NEXT: .LBB47_18: @ %entry -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: beq .LBB47_20 -; SOFT-NEXT: .LBB47_19: @ %entry -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: .LBB47_20: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: mov r0, r1 -; SOFT-NEXT: bne .LBB47_22 -; SOFT-NEXT: @ %bb.21: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB47_22: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: bpl .LBB47_14 +; SOFT-NEXT: @ %bb.7: @ %entry ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB47_24 -; SOFT-NEXT: @ %bb.23: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB47_24: @ %entry +; SOFT-NEXT: beq .LBB47_15 +; SOFT-NEXT: .LBB47_8: @ %entry +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB47_16 +; SOFT-NEXT: .LBB47_9: @ %entry +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB47_11 +; SOFT-NEXT: .LBB47_10: @ %entry +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: .LBB47_11: @ %entry ; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB47_25: @ %entry +; SOFT-NEXT: .LBB47_12: @ %entry ; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: cmp r6, #0 ; SOFT-NEXT: bne .LBB47_4 -; SOFT-NEXT: .LBB47_26: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: .LBB47_13: @ %entry +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bpl .LBB47_5 +; SOFT-NEXT: b .LBB47_6 +; SOFT-NEXT: .LBB47_14: @ %entry +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB47_5 -; SOFT-NEXT: .LBB47_27: @ %entry +; SOFT-NEXT: bne .LBB47_8 +; SOFT-NEXT: .LBB47_15: @ %entry ; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB47_6 -; SOFT-NEXT: b .LBB47_7 -; SOFT-NEXT: .LBB47_28: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB47_13 -; SOFT-NEXT: .LBB47_29: @ %entry -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB47_14 -; SOFT-NEXT: .LBB47_30: @ %entry -; SOFT-NEXT: mov r7, r2 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB47_9 +; SOFT-NEXT: .LBB47_16: @ %entry +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB47_15 -; SOFT-NEXT: b .LBB47_16 -; SOFT-NEXT: .LBB47_31: @ %entry -; SOFT-NEXT: mov r5, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB47_18 -; SOFT-NEXT: .LBB47_32: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: bne .LBB47_19 -; SOFT-NEXT: b .LBB47_20 +; SOFT-NEXT: bpl .LBB47_10 +; SOFT-NEXT: b .LBB47_11 ; ; VFP2-LABEL: ustest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs r4, r2, #1 ; VFP2-NEXT: eor r12, r2, #1 -; VFP2-NEXT: sbcs r4, r3, #0 -; VFP2-NEXT: orr.w lr, r12, r3 -; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: mov.w r5, #1 +; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: orr.w r12, r12, r3 +; VFP2-NEXT: mov.w r2, #0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r4 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr -; VFP2-NEXT: cmp r4, #0 +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r4 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: mov lr, r1 -; VFP2-NEXT: mov.w r4, #1 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne lr, r0 -; VFP2-NEXT: moveq lr, r0 -; VFP2-NEXT: cmp r2, #1 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r5, r2 +; VFP2-NEXT: moveq r0, r12 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r4, r2 -; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: movmi lr, r3 +; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r0, #0 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r4, r5 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r3, r12 -; VFP2-NEXT: rsbs r2, r4, #0 -; VFP2-NEXT: sbcs.w r2, r12, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: moveq r1, r2 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: orrs.w r2, r4, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r12, r1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r12 -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r1, #0 +; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f64i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, lr} -; FULL-NEXT: push {r4, lr} +; FULL-NEXT: .save {r7, lr} +; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w lr, r2, #1 ; FULL-NEXT: eor r12, r2, #1 -; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: subs r2, #1 +; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: orr.w r12, r12, r3 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r0, r0, lr, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r4, r0, r12, ne -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r1, r1, lr, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: mov.w lr, #1 -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r4, r1, ne -; FULL-NEXT: csel r12, r4, r0, eq -; FULL-NEXT: cmp r2, #1 -; FULL-NEXT: csel r0, r2, lr, lo -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r2, r2, lr, mi -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: csel r0, r0, r2, eq -; FULL-NEXT: csel r3, r3, lr, mi -; FULL-NEXT: rsbs r2, r0, #0 -; FULL-NEXT: sbcs.w r2, lr, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r4, r4, r2, ne -; FULL-NEXT: orrs r3, r0 -; FULL-NEXT: csel r0, r12, r4, eq +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: cmp.w r12, #0 +; FULL-NEXT: csel r0, r0, r12, ne +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r3, #0 +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it mi +; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r2, r1, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: cmp.w r12, #0 +; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r1, r1, r2, eq -; FULL-NEXT: pop {r4, pc} +; FULL-NEXT: it mi +; FULL-NEXT: movmi r1, #0 +; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -4035,226 +3931,132 @@ entry: define i64 @ustest_f32i64_mm(float %x) { ; SOFT-LABEL: ustest_f32i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .save {r4, r5, r6, lr} +; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: bl __fixsfti +; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: subs r4, r2, #1 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: subs r6, r2, #1 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: sbcs r6, r1 +; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: blt .LBB50_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB50_2: @ %entry -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: cmp r6, #0 ; SOFT-NEXT: bne .LBB50_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB50_4: @ %entry -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: eors r7, r5 -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: beq .LBB50_26 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB50_27 +; SOFT-NEXT: eors r2, r5 +; SOFT-NEXT: orrs r2, r3 +; SOFT-NEXT: bne .LBB50_6 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB50_6: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB50_8 -; SOFT-NEXT: .LBB50_7: @ %entry -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB50_8 +; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: mov r3, r1 ; SOFT-NEXT: .LBB50_8: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: bne .LBB50_10 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: bpl .LBB50_13 ; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB50_14 ; SOFT-NEXT: .LBB50_10: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: beq .LBB50_12 -; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r4, r7 -; SOFT-NEXT: .LBB50_12: @ %entry -; SOFT-NEXT: cmp r2, #1 -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: bhs .LBB50_28 -; SOFT-NEXT: @ %bb.13: @ %entry +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: beq .LBB50_15 +; SOFT-NEXT: .LBB50_11: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB50_29 +; SOFT-NEXT: bpl .LBB50_16 +; SOFT-NEXT: .LBB50_12: @ %entry +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: .LBB50_13: @ %entry +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB50_10 ; SOFT-NEXT: .LBB50_14: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB50_30 +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bne .LBB50_11 ; SOFT-NEXT: .LBB50_15: @ %entry +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB50_17 +; SOFT-NEXT: bmi .LBB50_12 ; SOFT-NEXT: .LBB50_16: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB50_17: @ %entry -; SOFT-NEXT: rsbs r2, r7, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB50_31 -; SOFT-NEXT: @ %bb.18: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB50_32 -; SOFT-NEXT: .LBB50_19: @ %entry -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: beq .LBB50_21 -; SOFT-NEXT: .LBB50_20: @ %entry -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: .LBB50_21: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: mov r0, r1 -; SOFT-NEXT: bne .LBB50_23 -; SOFT-NEXT: @ %bb.22: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB50_23: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB50_25 -; SOFT-NEXT: @ %bb.24: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB50_25: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB50_26: @ %entry -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB50_6 -; SOFT-NEXT: .LBB50_27: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB50_7 -; SOFT-NEXT: b .LBB50_8 -; SOFT-NEXT: .LBB50_28: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB50_14 -; SOFT-NEXT: .LBB50_29: @ %entry -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB50_15 -; SOFT-NEXT: .LBB50_30: @ %entry -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB50_16 -; SOFT-NEXT: b .LBB50_17 -; SOFT-NEXT: .LBB50_31: @ %entry -; SOFT-NEXT: mov r5, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB50_19 -; SOFT-NEXT: .LBB50_32: @ %entry ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: bne .LBB50_20 -; SOFT-NEXT: b .LBB50_21 +; SOFT-NEXT: pop {r4, r5, r6, pc} ; ; VFP2-LABEL: ustest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r4, r2, #1 ; VFP2-NEXT: eor r12, r2, #1 -; VFP2-NEXT: sbcs r4, r3, #0 -; VFP2-NEXT: orr.w lr, r12, r3 -; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: mov.w r5, #1 +; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: orr.w r12, r12, r3 +; VFP2-NEXT: mov.w r2, #0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r4 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr -; VFP2-NEXT: cmp r4, #0 +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r4 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: mov lr, r1 -; VFP2-NEXT: mov.w r4, #1 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne lr, r0 -; VFP2-NEXT: moveq lr, r0 -; VFP2-NEXT: cmp r2, #1 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r5, r2 +; VFP2-NEXT: moveq r0, r12 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r4, r2 -; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: movmi lr, r3 +; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r0, #0 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r4, r5 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r3, r12 -; VFP2-NEXT: rsbs r2, r4, #0 -; VFP2-NEXT: sbcs.w r2, r12, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: moveq r1, r2 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: orrs.w r2, r4, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r12, r1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r12 -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r1, #0 +; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f32i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, lr} -; FULL-NEXT: push {r4, lr} +; FULL-NEXT: .save {r7, lr} +; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w lr, r2, #1 ; FULL-NEXT: eor r12, r2, #1 -; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: subs r2, #1 +; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: orr.w r12, r12, r3 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r0, r0, lr, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r4, r0, r12, ne -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r1, r1, lr, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: mov.w lr, #1 -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r4, r1, ne -; FULL-NEXT: csel r12, r4, r0, eq -; FULL-NEXT: cmp r2, #1 -; FULL-NEXT: csel r0, r2, lr, lo -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r2, r2, lr, mi -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: csel r0, r0, r2, eq -; FULL-NEXT: csel r3, r3, lr, mi -; FULL-NEXT: rsbs r2, r0, #0 -; FULL-NEXT: sbcs.w r2, lr, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r4, r4, r2, ne -; FULL-NEXT: orrs r3, r0 -; FULL-NEXT: csel r0, r12, r4, eq +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: cmp.w r12, #0 +; FULL-NEXT: csel r0, r0, r12, ne +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r3, #0 +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it mi +; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r2, r1, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: cmp.w r12, #0 +; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r1, r1, r2, eq -; FULL-NEXT: pop {r4, pc} +; FULL-NEXT: it mi +; FULL-NEXT: movmi r1, #0 +; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -4629,233 +4431,139 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; SOFT-LABEL: ustest_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .save {r4, r5, r6, lr} +; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti +; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: subs r4, r2, #1 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: subs r6, r2, #1 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: sbcs r6, r1 +; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: blt .LBB53_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: .LBB53_2: @ %entry -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: cmp r6, #0 ; SOFT-NEXT: bne .LBB53_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB53_4: @ %entry -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: eors r7, r5 -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: beq .LBB53_26 +; SOFT-NEXT: eors r2, r5 +; SOFT-NEXT: orrs r2, r3 +; SOFT-NEXT: bne .LBB53_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB53_27 +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB53_6: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB53_8 -; SOFT-NEXT: .LBB53_7: @ %entry -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bmi .LBB53_8 +; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: mov r3, r1 ; SOFT-NEXT: .LBB53_8: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: bne .LBB53_10 +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: bpl .LBB53_13 ; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB53_14 ; SOFT-NEXT: .LBB53_10: @ %entry -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: beq .LBB53_12 -; SOFT-NEXT: @ %bb.11: @ %entry -; SOFT-NEXT: mov r4, r7 -; SOFT-NEXT: .LBB53_12: @ %entry -; SOFT-NEXT: cmp r2, #1 -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: bhs .LBB53_28 -; SOFT-NEXT: @ %bb.13: @ %entry +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: beq .LBB53_15 +; SOFT-NEXT: .LBB53_11: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB53_29 +; SOFT-NEXT: bpl .LBB53_16 +; SOFT-NEXT: .LBB53_12: @ %entry +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: pop {r4, r5, r6, pc} +; SOFT-NEXT: .LBB53_13: @ %entry +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB53_10 ; SOFT-NEXT: .LBB53_14: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB53_30 +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bne .LBB53_11 ; SOFT-NEXT: .LBB53_15: @ %entry +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB53_17 +; SOFT-NEXT: bmi .LBB53_12 ; SOFT-NEXT: .LBB53_16: @ %entry -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: .LBB53_17: @ %entry -; SOFT-NEXT: rsbs r2, r7, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB53_31 -; SOFT-NEXT: @ %bb.18: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB53_32 -; SOFT-NEXT: .LBB53_19: @ %entry -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: beq .LBB53_21 -; SOFT-NEXT: .LBB53_20: @ %entry -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: .LBB53_21: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: mov r0, r1 -; SOFT-NEXT: bne .LBB53_23 -; SOFT-NEXT: @ %bb.22: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB53_23: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB53_25 -; SOFT-NEXT: @ %bb.24: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB53_25: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB53_26: @ %entry -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB53_6 -; SOFT-NEXT: .LBB53_27: @ %entry ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB53_7 -; SOFT-NEXT: b .LBB53_8 -; SOFT-NEXT: .LBB53_28: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB53_14 -; SOFT-NEXT: .LBB53_29: @ %entry -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB53_15 -; SOFT-NEXT: .LBB53_30: @ %entry -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB53_16 -; SOFT-NEXT: b .LBB53_17 -; SOFT-NEXT: .LBB53_31: @ %entry -; SOFT-NEXT: mov r5, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB53_19 -; SOFT-NEXT: .LBB53_32: @ %entry ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: orrs r7, r3 -; SOFT-NEXT: bne .LBB53_20 -; SOFT-NEXT: b .LBB53_21 +; SOFT-NEXT: pop {r4, r5, r6, pc} ; ; VFP2-LABEL: ustest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r4, r2, #1 ; VFP2-NEXT: eor r12, r2, #1 -; VFP2-NEXT: sbcs r4, r3, #0 -; VFP2-NEXT: orr.w lr, r12, r3 -; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: mov.w r5, #1 +; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: orr.w r12, r12, r3 +; VFP2-NEXT: mov.w r2, #0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r4 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr -; VFP2-NEXT: cmp r4, #0 +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r4 -; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: mov lr, r1 -; VFP2-NEXT: mov.w r4, #1 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne lr, r0 -; VFP2-NEXT: moveq lr, r0 -; VFP2-NEXT: cmp r2, #1 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r5, r2 +; VFP2-NEXT: moveq r0, r12 ; VFP2-NEXT: cmp r3, #0 ; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r4, r2 -; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: movmi lr, r3 +; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r0, #0 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r4, r5 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r3, r12 -; VFP2-NEXT: rsbs r2, r4, #0 -; VFP2-NEXT: sbcs.w r2, r12, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: moveq r1, r2 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: orrs.w r2, r4, r3 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, lr -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r12, r1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r1, r12 -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: cmp.w lr, #0 +; VFP2-NEXT: it mi +; VFP2-NEXT: movmi r1, #0 +; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, lr} -; FULL-NEXT: push {r4, lr} +; FULL-NEXT: .save {r7, lr} +; FULL-NEXT: push {r7, lr} ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w lr, r2, #1 ; FULL-NEXT: eor r12, r2, #1 -; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: subs r2, #1 +; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: orr.w r12, r12, r3 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r0, r0, lr, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r4, r0, r12, ne -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r1, r1, lr, ne -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: mov.w lr, #1 -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r4, r1, ne -; FULL-NEXT: csel r12, r4, r0, eq -; FULL-NEXT: cmp r2, #1 -; FULL-NEXT: csel r0, r2, lr, lo -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r2, r2, lr, mi -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: csel r0, r0, r2, eq -; FULL-NEXT: csel r3, r3, lr, mi -; FULL-NEXT: rsbs r2, r0, #0 -; FULL-NEXT: sbcs.w r2, lr, r3 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r4, r4, r2, ne -; FULL-NEXT: orrs r3, r0 -; FULL-NEXT: csel r0, r12, r4, eq +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: cmp.w r12, #0 +; FULL-NEXT: csel r0, r0, r12, ne +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r3, #0 +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: it mi +; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r2, r1, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: cmp.w r12, #0 +; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: csel r1, r1, r2, eq -; FULL-NEXT: pop {r4, pc} +; FULL-NEXT: it mi +; FULL-NEXT: movmi r1, #0 +; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index a4d470b..9d5d5d1 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -3831,99 +3831,65 @@ entry: define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r7, r2, #1 -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: eor r0, r2, #1 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: orr r0, r0, r3 -; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r10, r5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r10, r0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, #1 -; CHECK-NEXT: movne r0, r10 -; CHECK-NEXT: mov r8, #1 -; CHECK-NEXT: moveq r0, r10 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: movlo r1, r2 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movpl r2, r8 -; CHECK-NEXT: mov r11, #0 -; CHECK-NEXT: moveq r2, r1 -; CHECK-NEXT: movpl r3, r11 -; CHECK-NEXT: rsbs r1, r2, #0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: rscs r1, r3, #0 +; CHECK-NEXT: orr r10, r0, r3 +; CHECK-NEXT: subs r0, r2, #1 +; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r10, r7 -; CHECK-NEXT: orrs r9, r2, r3 -; CHECK-NEXT: moveq r10, r0 +; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: moveq r4, r7 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: moveq r4, r10 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movpl r5, r9 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: eor r4, r2, #1 -; CHECK-NEXT: orr r6, r4, r3 -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r0, r4 +; CHECK-NEXT: eor r6, r2, #1 +; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: orr r6, r6, r3 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r0, r2 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r6, #1 -; CHECK-NEXT: movne r1, r0 -; CHECK-NEXT: moveq r1, r0 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: movlo r6, r2 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: movne r7, r8 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: moveq r7, r10 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwmi r7, #0 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r8, r2 -; CHECK-NEXT: movpl r3, r11 -; CHECK-NEXT: moveq r8, r6 -; CHECK-NEXT: rsbs r2, r8, #0 -; CHECK-NEXT: rscs r2, r3, #0 -; CHECK-NEXT: movwlt r11, #1 -; CHECK-NEXT: cmp r11, #0 -; CHECK-NEXT: moveq r0, r11 -; CHECK-NEXT: orrs r2, r8, r3 -; CHECK-NEXT: moveq r0, r1 -; CHECK-NEXT: cmp r11, #0 -; CHECK-NEXT: movne r11, r4 +; CHECK-NEXT: movmi r9, r3 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movwmi r0, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.32 d1[0], r0 -; CHECK-NEXT: moveq r11, r4 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov.32 d0[0], r10 -; CHECK-NEXT: movne r7, r5 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: vmov.32 d0[0], r0 +; CHECK-NEXT: moveq r2, r6 ; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r11 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: vmov.32 d0[1], r7 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: movwmi r2, #0 +; CHECK-NEXT: vmov.32 d0[1], r2 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4107,99 +4073,65 @@ entry: define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 +; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r7, r2, #1 -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: eor r0, r2, #1 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: orr r0, r0, r3 -; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r10, r5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r10, r0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: movne r0, r10 -; CHECK-NEXT: mov r1, #1 -; CHECK-NEXT: moveq r0, r10 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: movlo r1, r2 -; CHECK-NEXT: mov r8, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r11, #0 -; CHECK-NEXT: movpl r2, r8 -; CHECK-NEXT: movpl r3, r11 -; CHECK-NEXT: moveq r2, r1 -; CHECK-NEXT: rsbs r1, r2, #0 -; CHECK-NEXT: rscs r1, r3, #0 +; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: orr r10, r0, r3 +; CHECK-NEXT: subs r0, r2, #1 ; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r10, r7 -; CHECK-NEXT: orrs r9, r2, r3 -; CHECK-NEXT: moveq r10, r0 +; CHECK-NEXT: moveq r4, r7 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: moveq r4, r10 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movpl r5, r9 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: eor r4, r2, #1 -; CHECK-NEXT: orr r6, r4, r3 -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r0, r4 +; CHECK-NEXT: eor r6, r2, #1 +; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: orr r6, r6, r3 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r0, r2 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r6, #1 -; CHECK-NEXT: movne r1, r0 -; CHECK-NEXT: moveq r1, r0 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: movlo r6, r2 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: movne r7, r8 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: moveq r7, r10 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwmi r7, #0 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movmi r8, r2 -; CHECK-NEXT: movpl r3, r11 -; CHECK-NEXT: moveq r8, r6 -; CHECK-NEXT: rsbs r2, r8, #0 -; CHECK-NEXT: rscs r2, r3, #0 -; CHECK-NEXT: movwlt r11, #1 -; CHECK-NEXT: cmp r11, #0 -; CHECK-NEXT: moveq r0, r11 -; CHECK-NEXT: orrs r2, r8, r3 -; CHECK-NEXT: moveq r0, r1 -; CHECK-NEXT: cmp r11, #0 -; CHECK-NEXT: movne r11, r4 +; CHECK-NEXT: movmi r9, r3 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movwmi r0, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.32 d1[0], r0 -; CHECK-NEXT: moveq r11, r4 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov.32 d0[0], r10 -; CHECK-NEXT: movne r7, r5 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: vmov.32 d0[0], r0 +; CHECK-NEXT: moveq r2, r6 ; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r11 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: vmov.32 d0[1], r7 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: movwmi r2, #0 +; CHECK-NEXT: vmov.32 d0[1], r2 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4549,205 +4481,130 @@ entry: define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: .pad #4 -; CHECK-NEON-NEXT: sub sp, sp, #4 +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: mov r7, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: mov r8, r0 ; CHECK-NEON-NEXT: eor r0, r2, #1 -; CHECK-NEON-NEXT: mov r5, r2 -; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: orr r0, r0, r3 -; CHECK-NEON-NEXT: moveq r8, r4 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mov r10, #1 -; CHECK-NEON-NEXT: moveq r8, r0 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movne r4, r1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r4, r0 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: mov r7, r4 -; CHECK-NEON-NEXT: mov r0, #1 -; CHECK-NEON-NEXT: movne r7, r8 -; CHECK-NEON-NEXT: mov r6, r3 -; CHECK-NEON-NEXT: moveq r7, r8 -; CHECK-NEON-NEXT: cmp r5, #1 -; CHECK-NEON-NEXT: movlo r0, r5 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movpl r5, r10 +; CHECK-NEON-NEXT: orr r10, r0, r3 +; CHECK-NEON-NEXT: subs r0, r2, #1 +; CHECK-NEON-NEXT: vmov s0, r7 +; CHECK-NEON-NEXT: sbcs r0, r3, #0 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: moveq r4, r7 +; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: moveq r4, r10 +; CHECK-NEON-NEXT: mov r5, r3 ; CHECK-NEON-NEXT: mov r9, #0 -; CHECK-NEON-NEXT: moveq r5, r0 -; CHECK-NEON-NEXT: movpl r6, r9 -; CHECK-NEON-NEXT: rsbs r0, r5, #0 -; CHECK-NEON-NEXT: mov r11, #0 -; CHECK-NEON-NEXT: rscs r0, r6, #0 -; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: movwlt r11, #1 -; CHECK-NEON-NEXT: cmp r11, #0 -; CHECK-NEON-NEXT: moveq r8, r11 -; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: orrs r5, r5, r6 -; CHECK-NEON-NEXT: moveq r8, r7 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: movpl r5, r9 +; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: movwmi r4, #0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r6, r2, #1 -; CHECK-NEON-NEXT: eor r7, r2, #1 -; CHECK-NEON-NEXT: sbcs r6, r3, #0 -; CHECK-NEON-NEXT: orr r7, r7, r3 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: eor r6, r2, #1 +; CHECK-NEON-NEXT: subs r2, r2, #1 +; CHECK-NEON-NEXT: sbcs r2, r3, #0 +; CHECK-NEON-NEXT: orr r6, r6, r3 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: moveq r0, r2 ; CHECK-NEON-NEXT: cmp r6, #0 ; CHECK-NEON-NEXT: moveq r0, r6 ; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r0, r7 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r6, r7 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mov r1, r6 -; CHECK-NEON-NEXT: mov r7, #1 -; CHECK-NEON-NEXT: movne r1, r0 -; CHECK-NEON-NEXT: moveq r1, r0 -; CHECK-NEON-NEXT: cmp r2, #1 -; CHECK-NEON-NEXT: movlo r7, r2 +; CHECK-NEON-NEXT: movne r7, r8 +; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: moveq r7, r10 +; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: movwmi r7, #0 ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movmi r10, r2 -; CHECK-NEON-NEXT: movpl r3, r9 -; CHECK-NEON-NEXT: moveq r10, r7 -; CHECK-NEON-NEXT: rsbs r2, r10, #0 -; CHECK-NEON-NEXT: rscs r2, r3, #0 -; CHECK-NEON-NEXT: movwlt r9, #1 -; CHECK-NEON-NEXT: cmp r9, #0 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: orrs r2, r10, r3 -; CHECK-NEON-NEXT: moveq r0, r1 +; CHECK-NEON-NEXT: movmi r9, r3 ; CHECK-NEON-NEXT: cmp r9, #0 -; CHECK-NEON-NEXT: movne r9, r6 +; CHECK-NEON-NEXT: movwmi r0, #0 ; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: vmov.32 d1[0], r0 -; CHECK-NEON-NEXT: moveq r9, r6 -; CHECK-NEON-NEXT: cmp r11, #0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r8 -; CHECK-NEON-NEXT: movne r11, r4 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r9 -; CHECK-NEON-NEXT: moveq r11, r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r11 +; CHECK-NEON-NEXT: vmov.32 d1[0], r7 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r0 +; CHECK-NEON-NEXT: moveq r2, r6 +; CHECK-NEON-NEXT: cmp r9, #0 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: movwmi r2, #0 +; CHECK-NEON-NEXT: vmov.32 d0[1], r2 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: add sp, sp, #4 -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: ustest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP16-NEXT: .pad #4 -; CHECK-FP16-NEXT: sub sp, sp, #4 -; CHECK-FP16-NEXT: .vsave {d8} -; CHECK-FP16-NEXT: vpush {d8} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[0] -; CHECK-FP16-NEXT: vorr d8, d0, d0 +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r7, r2, #1 -; CHECK-FP16-NEXT: mov r10, r0 +; CHECK-FP16-NEXT: mov r8, r0 ; CHECK-FP16-NEXT: eor r0, r2, #1 -; CHECK-FP16-NEXT: sbcs r7, r3, #0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: orr r0, r0, r3 -; CHECK-FP16-NEXT: movwlt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r10, r5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r10, r0 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r5, r0 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: mov r0, r5 -; CHECK-FP16-NEXT: mov r1, #1 -; CHECK-FP16-NEXT: movne r0, r10 -; CHECK-FP16-NEXT: mov r8, #1 -; CHECK-FP16-NEXT: moveq r0, r10 -; CHECK-FP16-NEXT: cmp r2, #1 -; CHECK-FP16-NEXT: movlo r1, r2 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movpl r2, r8 -; CHECK-FP16-NEXT: mov r11, #0 -; CHECK-FP16-NEXT: moveq r2, r1 -; CHECK-FP16-NEXT: movpl r3, r11 -; CHECK-FP16-NEXT: rsbs r1, r2, #0 +; CHECK-FP16-NEXT: orr r10, r0, r3 +; CHECK-FP16-NEXT: subs r0, r2, #1 +; CHECK-FP16-NEXT: sbcs r0, r3, #0 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: rscs r1, r3, #0 -; CHECK-FP16-NEXT: vmov.u16 r1, d8[1] ; CHECK-FP16-NEXT: movwlt r7, #1 +; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r10, r7 -; CHECK-FP16-NEXT: orrs r9, r2, r3 -; CHECK-FP16-NEXT: moveq r10, r0 -; CHECK-FP16-NEXT: vmov s0, r1 +; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: moveq r4, r7 +; CHECK-FP16-NEXT: cmp r10, #0 +; CHECK-FP16-NEXT: moveq r4, r10 +; CHECK-FP16-NEXT: mov r5, r3 +; CHECK-FP16-NEXT: mov r9, #0 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: movpl r5, r9 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: movwmi r4, #0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: eor r4, r2, #1 -; CHECK-FP16-NEXT: orr r6, r4, r3 -; CHECK-FP16-NEXT: subs r4, r2, #1 -; CHECK-FP16-NEXT: sbcs r4, r3, #0 -; CHECK-FP16-NEXT: mov r4, #0 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: moveq r0, r4 +; CHECK-FP16-NEXT: eor r6, r2, #1 +; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: sbcs r2, r3, #0 +; CHECK-FP16-NEXT: orr r6, r6, r3 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: moveq r0, r2 ; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: moveq r0, r6 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movne r4, r1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r4, r6 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: mov r1, r4 -; CHECK-FP16-NEXT: mov r6, #1 -; CHECK-FP16-NEXT: movne r1, r0 -; CHECK-FP16-NEXT: moveq r1, r0 -; CHECK-FP16-NEXT: cmp r2, #1 -; CHECK-FP16-NEXT: movlo r6, r2 +; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: movne r7, r8 +; CHECK-FP16-NEXT: cmp r10, #0 +; CHECK-FP16-NEXT: moveq r7, r10 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: movwmi r7, #0 ; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movmi r8, r2 -; CHECK-FP16-NEXT: movpl r3, r11 -; CHECK-FP16-NEXT: moveq r8, r6 -; CHECK-FP16-NEXT: rsbs r2, r8, #0 -; CHECK-FP16-NEXT: rscs r2, r3, #0 -; CHECK-FP16-NEXT: movwlt r11, #1 -; CHECK-FP16-NEXT: cmp r11, #0 -; CHECK-FP16-NEXT: moveq r0, r11 -; CHECK-FP16-NEXT: orrs r2, r8, r3 -; CHECK-FP16-NEXT: moveq r0, r1 -; CHECK-FP16-NEXT: cmp r11, #0 -; CHECK-FP16-NEXT: movne r11, r4 +; CHECK-FP16-NEXT: movmi r9, r3 +; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: movwmi r0, #0 ; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: vmov.32 d1[0], r0 -; CHECK-FP16-NEXT: moveq r11, r4 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r10 -; CHECK-FP16-NEXT: movne r7, r5 +; CHECK-FP16-NEXT: vmov.32 d1[0], r7 +; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: vmov.32 d0[0], r0 +; CHECK-FP16-NEXT: moveq r2, r6 ; CHECK-FP16-NEXT: cmp r9, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r11 -; CHECK-FP16-NEXT: moveq r7, r5 -; CHECK-FP16-NEXT: vmov.32 d0[1], r7 -; CHECK-FP16-NEXT: vpop {d8} -; CHECK-FP16-NEXT: add sp, sp, #4 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: movwmi r2, #0 +; CHECK-FP16-NEXT: vmov.32 d0[1], r2 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 6b9dc00..d467ef6 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -2100,12 +2100,9 @@ define i32 @ustest_f64i32_mm(double %x) { ; RV32IF-NEXT: .LBB29_2: # %entry ; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: beqz a1, .LBB29_4 -; RV32IF-NEXT: # %bb.3: # %entry -; RV32IF-NEXT: sgtz a1, a1 -; RV32IF-NEXT: neg a1, a1 +; RV32IF-NEXT: slti a1, a1, 0 +; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: .LBB29_4: # %entry ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret @@ -2394,12 +2391,9 @@ define i32 @ustest_f16i32_mm(half %x) { ; RV32-NEXT: .LBB35_2: # %entry ; RV32-NEXT: neg a2, a2 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: beqz a1, .LBB35_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: sgtz a1, a1 -; RV32-NEXT: neg a1, a1 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: .LBB35_4: # %entry ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3270,63 +3264,33 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti@plt -; RV32IF-NEXT: lw a1, 16(sp) -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: li a3, 1 -; RV32IF-NEXT: mv a6, a1 -; RV32IF-NEXT: bltz a0, .LBB47_2 +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: lw a3, 12(sp) +; RV32IF-NEXT: lw a1, 20(sp) +; RV32IF-NEXT: lw a4, 16(sp) +; RV32IF-NEXT: slti a2, a1, 0 +; RV32IF-NEXT: beqz a1, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: li a6, 1 -; RV32IF-NEXT: .LBB47_2: # %entry -; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: bltu a1, a3, .LBB47_4 -; RV32IF-NEXT: # %bb.3: # %entry -; RV32IF-NEXT: li a2, 1 -; RV32IF-NEXT: .LBB47_4: # %entry -; RV32IF-NEXT: lw a4, 12(sp) -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: slti a5, a0, 0 -; RV32IF-NEXT: beqz a0, .LBB47_6 -; RV32IF-NEXT: # %bb.5: # %entry -; RV32IF-NEXT: mv a2, a6 -; RV32IF-NEXT: mv a6, a5 -; RV32IF-NEXT: j .LBB47_7 -; RV32IF-NEXT: .LBB47_6: -; RV32IF-NEXT: seqz a6, a1 -; RV32IF-NEXT: .LBB47_7: # %entry -; RV32IF-NEXT: neg a6, a6 -; RV32IF-NEXT: and a3, a6, a3 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a1, a1, a0 -; RV32IF-NEXT: seqz a1, a1 -; RV32IF-NEXT: addi a1, a1, -1 -; RV32IF-NEXT: and a3, a1, a3 -; RV32IF-NEXT: and a4, a6, a4 -; RV32IF-NEXT: and a1, a1, a4 -; RV32IF-NEXT: neg a4, a5 -; RV32IF-NEXT: and a4, a4, a0 -; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: beqz a1, .LBB47_9 -; RV32IF-NEXT: # %bb.8: # %entry -; RV32IF-NEXT: seqz a0, a1 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: and a0, a0, a3 -; RV32IF-NEXT: .LBB47_9: # %entry -; RV32IF-NEXT: beqz a4, .LBB47_11 -; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: sgtz a5, a4 -; RV32IF-NEXT: or a2, a2, a4 -; RV32IF-NEXT: bnez a2, .LBB47_12 -; RV32IF-NEXT: j .LBB47_13 -; RV32IF-NEXT: .LBB47_11: -; RV32IF-NEXT: snez a5, a2 -; RV32IF-NEXT: or a2, a2, a4 -; RV32IF-NEXT: beqz a2, .LBB47_13 -; RV32IF-NEXT: .LBB47_12: # %entry -; RV32IF-NEXT: neg a2, a5 -; RV32IF-NEXT: and a0, a2, a3 +; RV32IF-NEXT: mv a5, a2 +; RV32IF-NEXT: j .LBB47_3 +; RV32IF-NEXT: .LBB47_2: +; RV32IF-NEXT: seqz a5, a4 +; RV32IF-NEXT: .LBB47_3: # %entry +; RV32IF-NEXT: neg a5, a5 +; RV32IF-NEXT: and a3, a5, a3 +; RV32IF-NEXT: xori a4, a4, 1 +; RV32IF-NEXT: or a4, a4, a1 +; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: addi a4, a4, -1 +; RV32IF-NEXT: and a3, a4, a3 +; RV32IF-NEXT: and a0, a5, a0 +; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: .LBB47_13: # %entry +; RV32IF-NEXT: slti a1, a1, 0 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: and a1, a1, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3350,12 +3314,9 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: beqz a2, .LBB47_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: sgtz a1, a2 -; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slti a1, a2, 0 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: .LBB47_4: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3368,63 +3329,33 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti@plt -; RV32IFD-NEXT: lw a1, 16(sp) -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: li a3, 1 -; RV32IFD-NEXT: mv a6, a1 -; RV32IFD-NEXT: bltz a0, .LBB47_2 +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) +; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: lw a4, 16(sp) +; RV32IFD-NEXT: slti a2, a1, 0 +; RV32IFD-NEXT: beqz a1, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: li a6, 1 -; RV32IFD-NEXT: .LBB47_2: # %entry -; RV32IFD-NEXT: mv a2, a1 -; RV32IFD-NEXT: bltu a1, a3, .LBB47_4 -; RV32IFD-NEXT: # %bb.3: # %entry -; RV32IFD-NEXT: li a2, 1 -; RV32IFD-NEXT: .LBB47_4: # %entry -; RV32IFD-NEXT: lw a4, 12(sp) -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: slti a5, a0, 0 -; RV32IFD-NEXT: beqz a0, .LBB47_6 -; RV32IFD-NEXT: # %bb.5: # %entry -; RV32IFD-NEXT: mv a2, a6 -; RV32IFD-NEXT: mv a6, a5 -; RV32IFD-NEXT: j .LBB47_7 -; RV32IFD-NEXT: .LBB47_6: -; RV32IFD-NEXT: seqz a6, a1 -; RV32IFD-NEXT: .LBB47_7: # %entry -; RV32IFD-NEXT: neg a6, a6 -; RV32IFD-NEXT: and a3, a6, a3 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a1, a1, a0 -; RV32IFD-NEXT: seqz a1, a1 -; RV32IFD-NEXT: addi a1, a1, -1 -; RV32IFD-NEXT: and a3, a1, a3 -; RV32IFD-NEXT: and a4, a6, a4 -; RV32IFD-NEXT: and a1, a1, a4 -; RV32IFD-NEXT: neg a4, a5 -; RV32IFD-NEXT: and a4, a4, a0 -; RV32IFD-NEXT: mv a0, a3 -; RV32IFD-NEXT: beqz a1, .LBB47_9 -; RV32IFD-NEXT: # %bb.8: # %entry -; RV32IFD-NEXT: seqz a0, a1 -; RV32IFD-NEXT: addi a0, a0, -1 -; RV32IFD-NEXT: and a0, a0, a3 -; RV32IFD-NEXT: .LBB47_9: # %entry -; RV32IFD-NEXT: beqz a4, .LBB47_11 -; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: sgtz a5, a4 -; RV32IFD-NEXT: or a2, a2, a4 -; RV32IFD-NEXT: bnez a2, .LBB47_12 -; RV32IFD-NEXT: j .LBB47_13 -; RV32IFD-NEXT: .LBB47_11: -; RV32IFD-NEXT: snez a5, a2 -; RV32IFD-NEXT: or a2, a2, a4 -; RV32IFD-NEXT: beqz a2, .LBB47_13 -; RV32IFD-NEXT: .LBB47_12: # %entry -; RV32IFD-NEXT: neg a2, a5 -; RV32IFD-NEXT: and a0, a2, a3 +; RV32IFD-NEXT: mv a5, a2 +; RV32IFD-NEXT: j .LBB47_3 +; RV32IFD-NEXT: .LBB47_2: +; RV32IFD-NEXT: seqz a5, a4 +; RV32IFD-NEXT: .LBB47_3: # %entry +; RV32IFD-NEXT: neg a5, a5 +; RV32IFD-NEXT: and a3, a5, a3 +; RV32IFD-NEXT: xori a4, a4, 1 +; RV32IFD-NEXT: or a4, a4, a1 +; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: addi a4, a4, -1 +; RV32IFD-NEXT: and a3, a4, a3 +; RV32IFD-NEXT: and a0, a5, a0 +; RV32IFD-NEXT: and a0, a4, a0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: .LBB47_13: # %entry +; RV32IFD-NEXT: slti a1, a1, 0 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: and a1, a1, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3604,63 +3535,33 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: li a3, 1 -; RV32-NEXT: mv a6, a1 -; RV32-NEXT: bltz a0, .LBB50_2 +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a3, 12(sp) +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a4, 16(sp) +; RV32-NEXT: slti a2, a1, 0 +; RV32-NEXT: beqz a1, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a6, 1 -; RV32-NEXT: .LBB50_2: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB50_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: li a2, 1 -; RV32-NEXT: .LBB50_4: # %entry -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: slti a5, a0, 0 -; RV32-NEXT: beqz a0, .LBB50_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: mv a2, a6 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: j .LBB50_7 -; RV32-NEXT: .LBB50_6: -; RV32-NEXT: seqz a6, a1 -; RV32-NEXT: .LBB50_7: # %entry -; RV32-NEXT: neg a6, a6 -; RV32-NEXT: and a3, a6, a3 -; RV32-NEXT: xori a1, a1, 1 -; RV32-NEXT: or a1, a1, a0 -; RV32-NEXT: seqz a1, a1 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a3, a1, a3 -; RV32-NEXT: and a4, a6, a4 -; RV32-NEXT: and a1, a1, a4 -; RV32-NEXT: neg a4, a5 -; RV32-NEXT: and a4, a4, a0 -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: beqz a1, .LBB50_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: seqz a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: .LBB50_9: # %entry -; RV32-NEXT: beqz a4, .LBB50_11 -; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: sgtz a5, a4 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: bnez a2, .LBB50_12 -; RV32-NEXT: j .LBB50_13 -; RV32-NEXT: .LBB50_11: -; RV32-NEXT: snez a5, a2 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: beqz a2, .LBB50_13 -; RV32-NEXT: .LBB50_12: # %entry -; RV32-NEXT: neg a2, a5 -; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: mv a5, a2 +; RV32-NEXT: j .LBB50_3 +; RV32-NEXT: .LBB50_2: +; RV32-NEXT: seqz a5, a4 +; RV32-NEXT: .LBB50_3: # %entry +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: and a3, a5, a3 +; RV32-NEXT: xori a4, a4, 1 +; RV32-NEXT: or a4, a4, a1 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: and a0, a5, a0 +; RV32-NEXT: and a0, a4, a0 +; RV32-NEXT: neg a2, a2 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: .LBB50_13: # %entry +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3684,12 +3585,9 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: beqz a2, .LBB50_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: sgtz a1, a2 -; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slti a1, a2, 0 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: .LBB50_4: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3918,63 +3816,33 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: li a3, 1 -; RV32-NEXT: mv a6, a1 -; RV32-NEXT: bltz a0, .LBB53_2 +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a3, 12(sp) +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: lw a4, 16(sp) +; RV32-NEXT: slti a2, a1, 0 +; RV32-NEXT: beqz a1, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a6, 1 -; RV32-NEXT: .LBB53_2: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB53_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: li a2, 1 -; RV32-NEXT: .LBB53_4: # %entry -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: slti a5, a0, 0 -; RV32-NEXT: beqz a0, .LBB53_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: mv a2, a6 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: j .LBB53_7 -; RV32-NEXT: .LBB53_6: -; RV32-NEXT: seqz a6, a1 -; RV32-NEXT: .LBB53_7: # %entry -; RV32-NEXT: neg a6, a6 -; RV32-NEXT: and a3, a6, a3 -; RV32-NEXT: xori a1, a1, 1 -; RV32-NEXT: or a1, a1, a0 -; RV32-NEXT: seqz a1, a1 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a3, a1, a3 -; RV32-NEXT: and a4, a6, a4 -; RV32-NEXT: and a1, a1, a4 -; RV32-NEXT: neg a4, a5 -; RV32-NEXT: and a4, a4, a0 -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: beqz a1, .LBB53_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: seqz a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: .LBB53_9: # %entry -; RV32-NEXT: beqz a4, .LBB53_11 -; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: sgtz a5, a4 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: bnez a2, .LBB53_12 -; RV32-NEXT: j .LBB53_13 -; RV32-NEXT: .LBB53_11: -; RV32-NEXT: snez a5, a2 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: beqz a2, .LBB53_13 -; RV32-NEXT: .LBB53_12: # %entry -; RV32-NEXT: neg a2, a5 -; RV32-NEXT: and a0, a2, a3 +; RV32-NEXT: mv a5, a2 +; RV32-NEXT: j .LBB53_3 +; RV32-NEXT: .LBB53_2: +; RV32-NEXT: seqz a5, a4 +; RV32-NEXT: .LBB53_3: # %entry +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: and a3, a5, a3 +; RV32-NEXT: xori a4, a4, 1 +; RV32-NEXT: or a4, a4, a1 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: and a0, a5, a0 +; RV32-NEXT: and a0, a4, a0 +; RV32-NEXT: neg a2, a2 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: .LBB53_13: # %entry +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -4000,12 +3868,9 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: beqz a2, .LBB53_4 -; RV64-NEXT: # %bb.3: # %entry -; RV64-NEXT: sgtz a1, a2 -; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slti a1, a2, 0 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: .LBB53_4: # %entry ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/min-max.ll b/llvm/test/CodeGen/RISCV/min-max.ll index 4a1039a..2a88807 100644 --- a/llvm/test/CodeGen/RISCV/min-max.ll +++ b/llvm/test/CodeGen/RISCV/min-max.ll @@ -682,13 +682,11 @@ define signext i32 @smin_i32_negone(i32 signext %a) { define i64 @smin_i64_negone(i64 %a) { ; RV32I-LABEL: smin_i64_negone: ; RV32I: # %bb.0: -; RV32I-NEXT: slti a2, a1, -1 -; RV32I-NEXT: li a3, -1 +; RV32I-NEXT: slti a2, a1, 0 ; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: beq a1, a3, .LBB27_2 -; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: .LBB27_2: +; RV32I-NEXT: slti a2, a1, -1 +; RV32I-NEXT: addi a2, a2, -1 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; @@ -702,13 +700,11 @@ define i64 @smin_i64_negone(i64 %a) { ; RV32ZBB-LABEL: smin_i64_negone: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: li a2, -1 -; RV32ZBB-NEXT: beq a1, a2, .LBB27_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slti a3, a1, -1 -; RV32ZBB-NEXT: addi a3, a3, -1 -; RV32ZBB-NEXT: or a0, a3, a0 -; RV32ZBB-NEXT: .LBB27_2: -; RV32ZBB-NEXT: min a1, a1, a2 +; RV32ZBB-NEXT: min a2, a1, a2 +; RV32ZBB-NEXT: slti a1, a1, 0 +; RV32ZBB-NEXT: addi a1, a1, -1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: mv a1, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: smin_i64_negone: diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 22a3776..c54ca19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -5755,37 +5755,31 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB47_2: # %entry -; CHECK-NOV-NEXT: mv a4, s1 +; CHECK-NOV-NEXT: mv a3, s1 ; CHECK-NOV-NEXT: blez s1, .LBB47_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a4, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: slti a4, a1, 1 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, a0 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a5, s1 -; CHECK-NOV-NEXT: addi a5, a5, -1 -; CHECK-NOV-NEXT: and a0, a5, a0 -; CHECK-NOV-NEXT: beqz a4, .LBB47_6 -; CHECK-NOV-NEXT: # %bb.5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a4 -; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: seqz a4, s1 +; CHECK-NOV-NEXT: addi a4, a4, -1 ; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: .LBB47_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a3 -; CHECK-NOV-NEXT: beqz a2, .LBB47_8 -; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: slti a3, a3, 0 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a0, a3, a0 +; CHECK-NOV-NEXT: slti a2, a2, 0 +; CHECK-NOV-NEXT: addi a2, a2, -1 ; CHECK-NOV-NEXT: and a1, a2, a1 -; CHECK-NOV-NEXT: .LBB47_8: # %entry ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5831,33 +5825,27 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: addi a4, s0, -1 ; CHECK-V-NEXT: seqz a4, a4 ; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: slti a5, a1, 1 -; CHECK-V-NEXT: neg a5, a5 +; CHECK-V-NEXT: and a3, a4, a3 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 ; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a6, a1 +; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: blez s0, .LBB47_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB47_4: # %entry -; CHECK-V-NEXT: and a1, a5, a0 -; CHECK-V-NEXT: addi a5, a6, -1 -; CHECK-V-NEXT: and a0, a4, a3 -; CHECK-V-NEXT: beqz s0, .LBB47_6 -; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a3, s0 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: .LBB47_6: # %entry -; CHECK-V-NEXT: and a1, a5, a1 -; CHECK-V-NEXT: beqz a2, .LBB47_8 -; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: sgtz a2, a2 -; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: .LBB47_8: # %entry +; CHECK-V-NEXT: slti a1, s0, 0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: slti a2, a2, 0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a1 -; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -6226,37 +6214,31 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB50_2: # %entry -; CHECK-NOV-NEXT: mv a4, s1 +; CHECK-NOV-NEXT: mv a3, s1 ; CHECK-NOV-NEXT: blez s1, .LBB50_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a4, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: slti a4, a1, 1 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, a0 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a5, s1 -; CHECK-NOV-NEXT: addi a5, a5, -1 -; CHECK-NOV-NEXT: and a0, a5, a0 -; CHECK-NOV-NEXT: beqz a4, .LBB50_6 -; CHECK-NOV-NEXT: # %bb.5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a4 -; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: seqz a4, s1 +; CHECK-NOV-NEXT: addi a4, a4, -1 ; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: .LBB50_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a3 -; CHECK-NOV-NEXT: beqz a2, .LBB50_8 -; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: slti a3, a3, 0 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a0, a3, a0 +; CHECK-NOV-NEXT: slti a2, a2, 0 +; CHECK-NOV-NEXT: addi a2, a2, -1 ; CHECK-NOV-NEXT: and a1, a2, a1 -; CHECK-NOV-NEXT: .LBB50_8: # %entry ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6302,33 +6284,27 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: addi a4, s0, -1 ; CHECK-V-NEXT: seqz a4, a4 ; CHECK-V-NEXT: addi a4, a4, -1 -; CHECK-V-NEXT: slti a5, a1, 1 -; CHECK-V-NEXT: neg a5, a5 +; CHECK-V-NEXT: and a3, a4, a3 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a6, a1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: blez s0, .LBB50_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li s0, 1 ; CHECK-V-NEXT: .LBB50_4: # %entry -; CHECK-V-NEXT: and a1, a5, a0 -; CHECK-V-NEXT: addi a5, a6, -1 -; CHECK-V-NEXT: and a0, a4, a3 -; CHECK-V-NEXT: beqz s0, .LBB50_6 -; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a3, s0 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: .LBB50_6: # %entry -; CHECK-V-NEXT: and a1, a5, a1 -; CHECK-V-NEXT: beqz a2, .LBB50_8 -; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: sgtz a2, a2 -; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: .LBB50_8: # %entry +; CHECK-V-NEXT: slti a1, s0, 0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: slti a2, a2, 0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a1 -; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -6686,37 +6662,31 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB53_2: # %entry -; CHECK-NOV-NEXT: mv a4, s1 +; CHECK-NOV-NEXT: mv a3, s1 ; CHECK-NOV-NEXT: blez s1, .LBB53_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a4, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 -; CHECK-NOV-NEXT: neg a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: slti a4, a1, 1 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 +; CHECK-NOV-NEXT: and a1, a1, a0 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 -; CHECK-NOV-NEXT: seqz a5, s1 -; CHECK-NOV-NEXT: addi a5, a5, -1 -; CHECK-NOV-NEXT: and a0, a5, a0 -; CHECK-NOV-NEXT: beqz a4, .LBB53_6 -; CHECK-NOV-NEXT: # %bb.5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a4 -; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: seqz a4, s1 +; CHECK-NOV-NEXT: addi a4, a4, -1 ; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: .LBB53_6: # %entry -; CHECK-NOV-NEXT: and a1, a1, a3 -; CHECK-NOV-NEXT: beqz a2, .LBB53_8 -; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 -; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: slti a3, a3, 0 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a0, a3, a0 +; CHECK-NOV-NEXT: slti a2, a2, 0 +; CHECK-NOV-NEXT: addi a2, a2, -1 ; CHECK-NOV-NEXT: and a1, a2, a1 -; CHECK-NOV-NEXT: .LBB53_8: # %entry ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -6749,40 +6719,34 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB53_2: # %entry -; CHECK-V-NEXT: mv a4, s1 +; CHECK-V-NEXT: mv a3, s1 ; CHECK-V-NEXT: blez s1, .LBB53_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a4, 1 +; CHECK-V-NEXT: li a3, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry -; CHECK-V-NEXT: slti a3, a1, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, a0 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: slti a0, s1, 1 -; CHECK-V-NEXT: neg a0, a0 -; CHECK-V-NEXT: and a0, a0, s0 +; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: slti a1, s1, 1 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a1, a1, s0 ; CHECK-V-NEXT: addi s1, s1, -1 -; CHECK-V-NEXT: seqz a5, s1 -; CHECK-V-NEXT: addi a5, a5, -1 -; CHECK-V-NEXT: and a0, a5, a0 -; CHECK-V-NEXT: beqz a4, .LBB53_6 -; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a4, a4 -; CHECK-V-NEXT: neg a4, a4 -; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: .LBB53_6: # %entry -; CHECK-V-NEXT: and a1, a1, a3 -; CHECK-V-NEXT: beqz a2, .LBB53_8 -; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: sgtz a2, a2 -; CHECK-V-NEXT: neg a2, a2 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: .LBB53_8: # %entry +; CHECK-V-NEXT: seqz a4, s1 +; CHECK-V-NEXT: addi a4, a4, -1 +; CHECK-V-NEXT: and a1, a4, a1 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 +; CHECK-V-NEXT: slti a2, a2, 0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a1 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index d3297d2..d4c5b47 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -1977,90 +1977,64 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: eor r7, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: orr.w r7, r7, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: mov.w r10, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r4, r1, r7, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r1, r0, r4, ne -; CHECK-NEXT: csel r7, r0, r1, eq -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r1, r2, r10, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r2, r10, mi -; CHECK-NEXT: csel r1, r1, r2, eq -; CHECK-NEXT: csel r2, r3, r11, mi -; CHECK-NEXT: rsbs r3, r1, #0 -; CHECK-NEXT: sbcs.w r3, r11, r2 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r0, r6, ne -; CHECK-NEXT: orrs.w r9, r1, r2 -; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: csel r8, r7, r3, eq -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: vmov r12, r1, d8 ; CHECK-NEXT: eor r7, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: orr.w r7, r7, r3 ; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: csel r0, r0, r5, ne ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r1, r1, r7, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r7, r0, r1, ne -; CHECK-NEXT: csel r7, r0, r7, eq -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r5, r2, r10, lo +; CHECK-NEXT: csel r6, r0, r7, ne ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r2, r10, mi -; CHECK-NEXT: csel r3, r3, r11, mi -; CHECK-NEXT: csel r2, r5, r2, eq -; CHECK-NEXT: rsbs r5, r2, #0 -; CHECK-NEXT: sbcs.w r5, r11, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: csel r0, r7, r0, eq -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r4, r6, ne -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: csel r3, r4, r3, eq +; CHECK-NEXT: csel r10, r3, r9, mi +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r6, #0 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: eor r4, r2, #1 +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: orr.w r4, r4, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r3, r3, r9, mi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r1, r5, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: csel r5, r8, r5, ne +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r7, r5, r7, ne +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r7, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r7, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r8 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r1, #0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2235,82 +2209,56 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov r10, r0, d0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: eor r7, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: orr.w r7, r7, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: mov.w r11, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r5, r1, r7, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r1, r0, r5, ne -; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r7, r2, r11, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r2, r11, mi -; CHECK-NEXT: csel r3, r3, r8, mi -; CHECK-NEXT: csel r2, r7, r2, eq -; CHECK-NEXT: rsbs r7, r2, #0 -; CHECK-NEXT: sbcs.w r7, r8, r3 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: orrs.w r9, r2, r3 -; CHECK-NEXT: csel r0, r1, r0, eq -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: vmov r9, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: eor r6, r2, #1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: orr.w r6, r6, r3 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: eor r1, r2, #1 +; CHECK-NEXT: orr.w r7, r1, r3 +; CHECK-NEXT: subs r1, r2, #1 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: cset r4, lt ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r6, r0, r1, ne -; CHECK-NEXT: csel r6, r0, r6, eq -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r4, r2, r11, lo +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r6, r0, r7, ne ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r2, r11, mi -; CHECK-NEXT: csel r3, r3, r8, mi -; CHECK-NEXT: csel r2, r4, r2, eq -; CHECK-NEXT: rsbs r4, r2, #0 -; CHECK-NEXT: sbcs.w r4, r8, r3 -; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: csel r11, r3, r10, mi +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r6, #0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: eor r5, r2, #1 +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: orr.w r5, r5, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r3, r3, r10, mi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: csel r0, r6, r0, eq +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: csel r4, r8, r4, ne ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r5, r7, ne -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: csel r3, r5, r3, eq -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r7, r1, r4, ne +; CHECK-NEXT: csel r7, r4, r7, ne +; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r7, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload -; CHECK-NEXT: csel r1, r1, r7, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r1, #0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: @@ -2469,90 +2417,63 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: eor r7, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: orr.w r7, r7, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: mov.w r10, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: eor r1, r2, #1 +; CHECK-NEXT: orr.w r6, r1, r3 +; CHECK-NEXT: subs r1, r2, #1 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: csel r0, r0, r7, ne ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r4, r1, r7, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r1, r0, r4, ne -; CHECK-NEXT: csel r1, r0, r1, eq -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r6, r2, r10, lo +; CHECK-NEXT: csel r5, r0, r6, ne ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r2, r10, mi -; CHECK-NEXT: csel r3, r3, r11, mi -; CHECK-NEXT: csel r2, r6, r2, eq -; CHECK-NEXT: rsbs r6, r2, #0 -; CHECK-NEXT: sbcs.w r6, r11, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: orrs.w r9, r2, r3 -; CHECK-NEXT: csel r8, r1, r0, eq +; CHECK-NEXT: csel r10, r3, r9, mi ; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: eor r7, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: orr.w r7, r7, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r1, r1, r7, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r7, r0, r1, ne -; CHECK-NEXT: csel r7, r0, r7, eq -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r5, r2, r10, lo +; CHECK-NEXT: eor r4, r2, #1 +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: orr.w r4, r4, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r0, r0, r2, ne +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r0, r4, ne ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r2, r10, mi -; CHECK-NEXT: csel r3, r3, r11, mi -; CHECK-NEXT: csel r2, r5, r2, eq -; CHECK-NEXT: rsbs r5, r2, #0 -; CHECK-NEXT: sbcs.w r5, r11, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: csel r0, r7, r0, eq +; CHECK-NEXT: csel r3, r3, r9, mi +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r0, #0 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: csel r7, r8, r7, ne ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r4, r6, ne -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: csel r3, r4, r3, eq -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r7, r1, r5, ne +; CHECK-NEXT: csel r7, r7, r6, ne +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r7, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r7, eq -; CHECK-NEXT: vmov q0[2], q0[0], r0, r8 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r1, #0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/VE/Scalar/smax.ll b/llvm/test/CodeGen/VE/Scalar/smax.ll index 93637d4..0f551e1 100644 --- a/llvm/test/CodeGen/VE/Scalar/smax.ll +++ b/llvm/test/CodeGen/VE/Scalar/smax.ll @@ -131,11 +131,8 @@ define i64 @func_smax_fore_zero_i64(i64 noundef %0) { define i128 @func_smax_fore_zero_i128(i128 noundef %0) { ; CHECK-LABEL: func_smax_fore_zero_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: cmov.l.le %s2, (0)1, %s1 -; CHECK-NEXT: cmov.l.eq %s2, %s0, %s1 +; CHECK-NEXT: cmov.l.lt %s0, (0)1, %s1 ; CHECK-NEXT: maxs.l %s1, 0, %s1 -; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 0) ret i128 %2 @@ -188,11 +185,8 @@ define i64 @func_smax_back_zero_i64(i64 noundef %0) { define i128 @func_smax_back_zero_i128(i128 noundef %0) { ; CHECK-LABEL: func_smax_back_zero_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: cmov.l.le %s2, (0)1, %s1 -; CHECK-NEXT: cmov.l.eq %s2, %s0, %s1 +; CHECK-NEXT: cmov.l.lt %s0, (0)1, %s1 ; CHECK-NEXT: maxs.l %s1, 0, %s1 -; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 0) ret i128 %2 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 9a39c62..531b0d3 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -678,6 +678,7 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -689,19 +690,12 @@ define i64 @ustest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: @@ -868,6 +862,7 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -879,19 +874,12 @@ define i64 @ustest_f32i64_cse_combine(float %x) #0 { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: @@ -1553,6 +1541,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1564,19 +1553,12 @@ define i64 @ustest_f64i64_mm(double %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: @@ -1671,6 +1653,7 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1682,19 +1665,12 @@ define i64 @ustest_f32i64_mm(float %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: @@ -1795,6 +1771,7 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -1806,19 +1783,12 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 007802d..6577754 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -2382,6 +2382,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2393,22 +2394,16 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2420,19 +2415,12 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return @@ -2684,6 +2672,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 @@ -2695,22 +2684,16 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 @@ -2722,19 +2705,12 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return @@ -2992,6 +2968,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 @@ -3003,22 +2980,16 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 @@ -3030,19 +3001,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: i64.eq ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 ; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.eqz +; CHECK-NEXT: i64.lt_s ; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return -- 2.7.4