}
// Function to calculate whether the Min/Max pair of SDNodes (potentially
-// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1)
-// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp
-// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The
-// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3
+// swapped around) make a signed saturate pattern, clamping to between a signed
+// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
+// Returns the node being clamped and the bitwidth of the clamp in BW. Should
+// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
+// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
- SDValue N3, ISD::CondCode CC, unsigned &BW) {
+ SDValue N3, ISD::CondCode CC, unsigned &BW,
+ bool &Unsigned) {
auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// The compare and select operand should be the same or the select operands
const APInt &MinC = MinCOp->getAPIntValue();
const APInt &MaxC = MaxCOp->getAPIntValue();
APInt MinCPlus1 = MinC + 1;
- if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2())
- return SDValue();
- BW = MinCPlus1.exactLogBase2() + 1;
- return N02;
+ if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2() + 1;
+ Unsigned = false;
+ return N02;
+ }
+
+ if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2();
+ Unsigned = true;
+ return N02;
+ }
+
+ return SDValue();
}
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
SelectionDAG &DAG) {
unsigned BW;
- SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW);
+ bool Unsigned;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
return SDValue();
EVT FPVT = Fp.getOperand(0).getValueType();
if (FPVT.isVector())
NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
FPVT.getVectorElementCount());
- if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(
- ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT))
+ unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
return SDValue();
SDLoc DL(Fp);
- SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0),
+ SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
DAG.getValueType(NewVT.getScalarType()));
- return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+ return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
+ : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
}
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
bool shouldSplatInsEltVarIndex(EVT VT) const override;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
+ // Converting to sat variants holds little benefit on X86 as we will just
+ // need to saturate the value back using fp arithmatic.
+ return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
+ }
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
define i32 @ustest_f64i32(double %x) {
; CHECK-LABEL: ustest_f64i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x8, x8, x9, lt
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: csel w0, w8, wzr, gt
+; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: ret
entry:
%conv = fptosi double %x to i64
define i32 @ustest_f32i32(float %x) {
; CHECK-LABEL: ustest_f32i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x8, x8, x9, lt
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: csel w0, w8, wzr, gt
+; CHECK-NEXT: fcvtzu w0, s0
; CHECK-NEXT: ret
entry:
%conv = fptosi float %x to i64
; CHECK-CVT-LABEL: ustest_f16i32:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #-1
-; CHECK-CVT-NEXT: fcvtzs x8, s0
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csel x8, x8, x9, lt
-; CHECK-CVT-NEXT: cmp x8, #0
-; CHECK-CVT-NEXT: csel w0, w8, wzr, gt
+; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: ustest_f16i32:
; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: fcvtzs x8, h0
-; CHECK-FP16-NEXT: mov w9, #-1
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csel x8, x8, x9, lt
-; CHECK-FP16-NEXT: cmp x8, #0
-; CHECK-FP16-NEXT: csel w0, w8, wzr, gt
+; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
entry:
%conv = fptosi half %x to i64
define i32 @ustest_f64i32_mm(double %x) {
; CHECK-LABEL: ustest_f64i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x8, x8, x9, lt
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: csel x0, x8, xzr, gt
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: ret
entry:
%conv = fptosi double %x to i64
define i32 @ustest_f32i32_mm(float %x) {
; CHECK-LABEL: ustest_f32i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x8, x8, x9, lt
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: csel x0, x8, xzr, gt
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: fcvtzu w0, s0
; CHECK-NEXT: ret
entry:
%conv = fptosi float %x to i64
; CHECK-CVT-LABEL: ustest_f16i32_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #-1
-; CHECK-CVT-NEXT: fcvtzs x8, s0
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csel x8, x8, x9, lt
-; CHECK-CVT-NEXT: cmp x8, #0
-; CHECK-CVT-NEXT: csel x0, x8, xzr, gt
-; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: ustest_f16i32_mm:
; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: fcvtzs x8, h0
-; CHECK-FP16-NEXT: mov w9, #-1
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csel x8, x8, x9, lt
-; CHECK-FP16-NEXT: cmp x8, #0
-; CHECK-FP16-NEXT: csel x0, x8, xzr, gt
-; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
entry:
%conv = fptosi half %x to i64
define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
-; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: cmgt v1.2d, v0.2d, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: mov d1, v0.d[1]
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fcvtzu w8, d1
+; CHECK-NEXT: mov v0.s[1], w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl v2.2d, v0.2s
-; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-NEXT: cmgt v4.2d, v1.2d, v0.2d
-; CHECK-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-NEXT: cmgt v1.2d, v2.2d, #0
-; CHECK-NEXT: cmgt v3.2d, v0.2d, #0
-; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-NEXT: and v2.16b, v0.16b, v3.16b
-; CHECK-NEXT: xtn v0.2s, v1.2d
-; CHECK-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
}
define <4 x i32> @ustest_f16i32(<4 x half> %x) {
-; CHECK-CVT-LABEL: ustest_f16i32:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h2, v0.h[2]
-; CHECK-CVT-NEXT: mov h3, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s4, h0
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: fcvtzs x8, s4
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzs x9, s2
-; CHECK-CVT-NEXT: fmov d2, x8
-; CHECK-CVT-NEXT: fcvtzs x8, s3
-; CHECK-CVT-NEXT: fmov d3, x9
-; CHECK-CVT-NEXT: fcvtzs x9, s0
-; CHECK-CVT-NEXT: mov v2.d[1], x8
-; CHECK-CVT-NEXT: mov v3.d[1], x9
-; CHECK-CVT-NEXT: cmgt v0.2d, v1.2d, v2.2d
-; CHECK-CVT-NEXT: cmgt v4.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT: bsl v0.16b, v2.16b, v1.16b
-; CHECK-CVT-NEXT: bit v1.16b, v3.16b, v4.16b
-; CHECK-CVT-NEXT: cmgt v2.2d, v0.2d, #0
-; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, #0
-; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-CVT-NEXT: xtn v0.2s, v0.2d
-; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: ustest_f16i32:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs x8, h0
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT: fcvtzs x9, h2
-; CHECK-FP16-NEXT: fmov d2, x8
-; CHECK-FP16-NEXT: fcvtzs x8, h3
-; CHECK-FP16-NEXT: fmov d3, x9
-; CHECK-FP16-NEXT: fcvtzs x9, h0
-; CHECK-FP16-NEXT: mov v2.d[1], x8
-; CHECK-FP16-NEXT: mov v3.d[1], x9
-; CHECK-FP16-NEXT: cmgt v0.2d, v1.2d, v2.2d
-; CHECK-FP16-NEXT: cmgt v4.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT: bsl v0.16b, v2.16b, v1.16b
-; CHECK-FP16-NEXT: bit v1.16b, v3.16b, v4.16b
-; CHECK-FP16-NEXT: cmgt v2.2d, v0.2d, #0
-; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, #0
-; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-FP16-NEXT: xtn v0.2s, v0.2d
-; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: ustest_f16i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
}
define <8 x i16> @ustest_f16i16(<8 x half> %x) {
-; CHECK-LABEL: ustest_f16i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl v2.4s, v0.4h
-; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: fcvtzs v2.4s, v2.4s
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s
-; CHECK-NEXT: smax v2.4s, v0.4s, v3.4s
-; CHECK-NEXT: xtn v0.4h, v1.4s
-; CHECK-NEXT: xtn2 v0.8h, v2.4s
-; CHECK-NEXT: ret
+; CHECK-CVT-LABEL: ustest_f16i16:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v1.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i16:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
%0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
-; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: cmgt v1.2d, v0.2d, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: mov d1, v0.d[1]
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fcvtzu w8, d1
+; CHECK-NEXT: mov v0.s[1], w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl v2.2d, v0.2s
-; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-NEXT: cmgt v4.2d, v1.2d, v0.2d
-; CHECK-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-NEXT: cmgt v1.2d, v2.2d, #0
-; CHECK-NEXT: cmgt v3.2d, v0.2d, #0
-; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-NEXT: and v2.16b, v0.16b, v3.16b
-; CHECK-NEXT: xtn v0.2s, v1.2d
-; CHECK-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
}
define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
-; CHECK-CVT-LABEL: ustest_f16i32_mm:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h2, v0.h[2]
-; CHECK-CVT-NEXT: mov h3, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s4, h0
-; CHECK-CVT-NEXT: mov h0, v0.h[3]
-; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: fcvtzs x8, s4
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fcvtzs x9, s2
-; CHECK-CVT-NEXT: fmov d2, x8
-; CHECK-CVT-NEXT: fcvtzs x8, s3
-; CHECK-CVT-NEXT: fmov d3, x9
-; CHECK-CVT-NEXT: fcvtzs x9, s0
-; CHECK-CVT-NEXT: mov v2.d[1], x8
-; CHECK-CVT-NEXT: mov v3.d[1], x9
-; CHECK-CVT-NEXT: cmgt v0.2d, v1.2d, v2.2d
-; CHECK-CVT-NEXT: cmgt v4.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT: bsl v0.16b, v2.16b, v1.16b
-; CHECK-CVT-NEXT: bit v1.16b, v3.16b, v4.16b
-; CHECK-CVT-NEXT: cmgt v2.2d, v0.2d, #0
-; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, #0
-; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-CVT-NEXT: xtn v0.2s, v0.2d
-; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: ustest_f16i32_mm:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[1]
-; CHECK-FP16-NEXT: fcvtzs x8, h0
-; CHECK-FP16-NEXT: mov h0, v0.h[3]
-; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT: fcvtzs x9, h2
-; CHECK-FP16-NEXT: fmov d2, x8
-; CHECK-FP16-NEXT: fcvtzs x8, h3
-; CHECK-FP16-NEXT: fmov d3, x9
-; CHECK-FP16-NEXT: fcvtzs x9, h0
-; CHECK-FP16-NEXT: mov v2.d[1], x8
-; CHECK-FP16-NEXT: mov v3.d[1], x9
-; CHECK-FP16-NEXT: cmgt v0.2d, v1.2d, v2.2d
-; CHECK-FP16-NEXT: cmgt v4.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT: bsl v0.16b, v2.16b, v1.16b
-; CHECK-FP16-NEXT: bit v1.16b, v3.16b, v4.16b
-; CHECK-FP16-NEXT: cmgt v2.2d, v0.2d, #0
-; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, #0
-; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-FP16-NEXT: xtn v0.2s, v0.2d
-; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: ustest_f16i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i16_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
}
define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
-; CHECK-LABEL: ustest_f16i16_mm:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl v2.4s, v0.4h
-; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: fcvtzs v2.4s, v2.4s
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s
-; CHECK-NEXT: smax v2.4s, v0.4s, v3.4s
-; CHECK-NEXT: xtn v0.4h, v1.4s
-; CHECK-NEXT: xtn2 v0.8h, v2.4s
-; CHECK-NEXT: ret
+; CHECK-CVT-LABEL: ustest_f16i16_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v1.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i16_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
%spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
;
; FULL-LABEL: ustest_f64i32:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov r0, r1, d0
-; FULL-NEXT: bl __aeabi_d2lz
-; FULL-NEXT: subs.w r2, r0, #-1
-; FULL-NEXT: sbcs r2, r1, #0
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: it eq
-; FULL-NEXT: moveq.w r0, #-1
-; FULL-NEXT: csel r1, r1, r2, ne
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: mov.w r2, #0
-; FULL-NEXT: sbcs.w r1, r2, r1
-; FULL-NEXT: cset r1, lt
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: csel r0, r0, r1, ne
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vcvt.u32.f64 s0, d0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i64
%0 = icmp slt i64 %conv, 4294967295
; SOFT-NEXT: mov r0, r2
; SOFT-NEXT: pop {r4, pc}
;
-; VFP2-LABEL: ustest_f32i32:
-; VFP2: @ %bb.0: @ %entry
-; VFP2-NEXT: .save {r7, lr}
-; VFP2-NEXT: push {r7, lr}
-; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: bl __aeabi_f2lz
-; VFP2-NEXT: subs.w r3, r0, #-1
-; VFP2-NEXT: mov.w r2, #0
-; VFP2-NEXT: sbcs r3, r1, #0
-; VFP2-NEXT: mov.w r3, #0
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r3, #1
-; VFP2-NEXT: cmp r3, #0
-; VFP2-NEXT: ite ne
-; VFP2-NEXT: movne r3, r1
-; VFP2-NEXT: moveq.w r0, #-1
-; VFP2-NEXT: rsbs r1, r0, #0
-; VFP2-NEXT: sbcs.w r1, r2, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r2, #1
-; VFP2-NEXT: cmp r2, #0
-; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
-; VFP2-NEXT: pop {r7, pc}
-;
-; FULL-LABEL: ustest_f32i32:
-; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: bl __aeabi_f2lz
-; FULL-NEXT: subs.w r2, r0, #-1
-; FULL-NEXT: sbcs r2, r1, #0
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: it eq
-; FULL-NEXT: moveq.w r0, #-1
-; FULL-NEXT: csel r1, r1, r2, ne
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: mov.w r2, #0
-; FULL-NEXT: sbcs.w r1, r2, r1
-; FULL-NEXT: cset r1, lt
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: csel r0, r0, r1, ne
-; FULL-NEXT: pop {r7, pc}
+; VFP-LABEL: ustest_f32i32:
+; VFP: @ %bb.0: @ %entry
+; VFP-NEXT: vcvt.u32.f32 s0, s0
+; VFP-NEXT: vmov r0, s0
+; VFP-NEXT: bx lr
entry:
%conv = fptosi float %x to i64
%0 = icmp slt i64 %conv, 4294967295
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: bl __aeabi_f2lz
-; VFP2-NEXT: subs.w r3, r0, #-1
-; VFP2-NEXT: mov.w r2, #0
-; VFP2-NEXT: sbcs r3, r1, #0
-; VFP2-NEXT: mov.w r3, #0
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r3, #1
-; VFP2-NEXT: cmp r3, #0
-; VFP2-NEXT: ite ne
-; VFP2-NEXT: movne r3, r1
-; VFP2-NEXT: moveq.w r0, #-1
-; VFP2-NEXT: rsbs r1, r0, #0
-; VFP2-NEXT: sbcs.w r1, r2, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r2, #1
-; VFP2-NEXT: cmp r2, #0
-; VFP2-NEXT: it eq
-; VFP2-NEXT: moveq r0, r2
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i32:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov.f16 r0, s0
-; FULL-NEXT: vmov s0, r0
-; FULL-NEXT: bl __fixhfdi
-; FULL-NEXT: subs.w r2, r0, #-1
-; FULL-NEXT: sbcs r2, r1, #0
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: it eq
-; FULL-NEXT: moveq.w r0, #-1
-; FULL-NEXT: csel r1, r1, r2, ne
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: mov.w r2, #0
-; FULL-NEXT: sbcs.w r1, r2, r1
-; FULL-NEXT: cset r1, lt
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: csel r0, r0, r1, ne
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vcvt.u32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i64
%0 = icmp slt i64 %conv, 4294967295
;
; FULL-LABEL: ustest_f64i32_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov r0, r1, d0
-; FULL-NEXT: bl __aeabi_d2lz
-; FULL-NEXT: mov r2, r0
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: it pl
-; FULL-NEXT: movpl.w r2, #-1
-; FULL-NEXT: csel r0, r0, r2, eq
-; FULL-NEXT: mov.w r2, #0
-; FULL-NEXT: csel r1, r1, r2, mi
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: csel r1, r0, r2, gt
-; FULL-NEXT: csel r0, r0, r1, eq
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vcvt.u32.f64 s0, d0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i64
%spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
; SOFT-NEXT: mov r0, r1
; SOFT-NEXT: pop {r4, pc}
;
-; VFP2-LABEL: ustest_f32i32_mm:
-; VFP2: @ %bb.0: @ %entry
-; VFP2-NEXT: .save {r7, lr}
-; VFP2-NEXT: push {r7, lr}
-; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: bl __aeabi_f2lz
-; VFP2-NEXT: mov r2, r0
-; VFP2-NEXT: cmp r1, #0
-; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl.w r2, #-1
-; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r0, r2
-; VFP2-NEXT: mov.w r2, #0
-; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl r1, r2
-; VFP2-NEXT: cmp r1, #0
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r2, r0
-; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r0, r2
-; VFP2-NEXT: pop {r7, pc}
-;
-; FULL-LABEL: ustest_f32i32_mm:
-; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: bl __aeabi_f2lz
-; FULL-NEXT: mov r2, r0
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: it pl
-; FULL-NEXT: movpl.w r2, #-1
-; FULL-NEXT: csel r0, r0, r2, eq
-; FULL-NEXT: mov.w r2, #0
-; FULL-NEXT: csel r1, r1, r2, mi
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: csel r1, r0, r2, gt
-; FULL-NEXT: csel r0, r0, r1, eq
-; FULL-NEXT: pop {r7, pc}
+; VFP-LABEL: ustest_f32i32_mm:
+; VFP: @ %bb.0: @ %entry
+; VFP-NEXT: vcvt.u32.f32 s0, s0
+; VFP-NEXT: vmov r0, s0
+; VFP-NEXT: bx lr
entry:
%conv = fptosi float %x to i64
%spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: bl __aeabi_f2lz
-; VFP2-NEXT: mov r2, r0
-; VFP2-NEXT: cmp r1, #0
-; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl.w r2, #-1
-; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r0, r2
-; VFP2-NEXT: mov.w r2, #0
-; VFP2-NEXT: it pl
-; VFP2-NEXT: movpl r1, r2
-; VFP2-NEXT: cmp r1, #0
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r2, r0
-; VFP2-NEXT: it ne
-; VFP2-NEXT: movne r0, r2
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i32_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov.f16 r0, s0
-; FULL-NEXT: vmov s0, r0
-; FULL-NEXT: bl __fixhfdi
-; FULL-NEXT: mov r2, r0
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: it pl
-; FULL-NEXT: movpl.w r2, #-1
-; FULL-NEXT: csel r0, r0, r2, eq
-; FULL-NEXT: mov.w r2, #0
-; FULL-NEXT: csel r1, r1, r2, mi
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: csel r1, r0, r2, gt
-; FULL-NEXT: csel r0, r0, r1, eq
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vcvt.u32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i64
%spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
}
define i32 @ustest_f64i32(double %x) {
-; RV32-LABEL: ustest_f64i32:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixdfdi@plt
-; RV32-NEXT: beqz a1, .LBB2_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a2, a1, 0
-; RV32-NEXT: beqz a2, .LBB2_3
-; RV32-NEXT: j .LBB2_4
-; RV32-NEXT: .LBB2_2:
-; RV32-NEXT: addi a2, a0, 1
-; RV32-NEXT: snez a2, a2
-; RV32-NEXT: bnez a2, .LBB2_4
-; RV32-NEXT: .LBB2_3: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB2_4: # %entry
-; RV32-NEXT: beqz a1, .LBB2_6
-; RV32-NEXT: # %bb.5: # %entry
-; RV32-NEXT: sgtz a1, a1
-; RV32-NEXT: beqz a1, .LBB2_7
-; RV32-NEXT: j .LBB2_8
-; RV32-NEXT: .LBB2_6:
-; RV32-NEXT: snez a1, a0
-; RV32-NEXT: bnez a1, .LBB2_8
-; RV32-NEXT: .LBB2_7: # %entry
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: .LBB2_8: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
+; RV32IF-LABEL: ustest_f64i32:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixdfdi@plt
+; RV32IF-NEXT: beqz a1, .LBB2_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: slti a2, a1, 0
+; RV32IF-NEXT: beqz a2, .LBB2_3
+; RV32IF-NEXT: j .LBB2_4
+; RV32IF-NEXT: .LBB2_2:
+; RV32IF-NEXT: addi a2, a0, 1
+; RV32IF-NEXT: snez a2, a2
+; RV32IF-NEXT: bnez a2, .LBB2_4
+; RV32IF-NEXT: .LBB2_3: # %entry
+; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: li a0, -1
+; RV32IF-NEXT: .LBB2_4: # %entry
+; RV32IF-NEXT: beqz a1, .LBB2_6
+; RV32IF-NEXT: # %bb.5: # %entry
+; RV32IF-NEXT: sgtz a1, a1
+; RV32IF-NEXT: beqz a1, .LBB2_7
+; RV32IF-NEXT: j .LBB2_8
+; RV32IF-NEXT: .LBB2_6:
+; RV32IF-NEXT: snez a1, a0
+; RV32IF-NEXT: bnez a1, .LBB2_8
+; RV32IF-NEXT: .LBB2_7: # %entry
+; RV32IF-NEXT: li a0, 0
+; RV32IF-NEXT: .LBB2_8: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
;
; RV64IF-LABEL: ustest_f64i32:
; RV64IF: # %bb.0: # %entry
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
;
+; RV32IFD-LABEL: ustest_f64i32:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: feq.d a0, ft0, ft0
+; RV32IFD-NEXT: bnez a0, .LBB2_2
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB2_2:
+; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
; RV64IFD-LABEL: ustest_f64i32:
; RV64IFD: # %bb.0: # %entry
; RV64IFD-NEXT: fmv.d.x ft0, a0
define i32 @ustest_f32i32(float %x) {
; RV32-LABEL: ustest_f32i32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixsfdi@plt
-; RV32-NEXT: beqz a1, .LBB5_2
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: feq.s a0, ft0, ft0
+; RV32-NEXT: bnez a0, .LBB5_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a2, a1, 0
-; RV32-NEXT: beqz a2, .LBB5_3
-; RV32-NEXT: j .LBB5_4
-; RV32-NEXT: .LBB5_2:
-; RV32-NEXT: addi a2, a0, 1
-; RV32-NEXT: snez a2, a2
-; RV32-NEXT: bnez a2, .LBB5_4
-; RV32-NEXT: .LBB5_3: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB5_4: # %entry
-; RV32-NEXT: beqz a1, .LBB5_6
-; RV32-NEXT: # %bb.5: # %entry
-; RV32-NEXT: sgtz a1, a1
-; RV32-NEXT: beqz a1, .LBB5_7
-; RV32-NEXT: j .LBB5_8
-; RV32-NEXT: .LBB5_6:
-; RV32-NEXT: snez a1, a0
-; RV32-NEXT: bnez a1, .LBB5_8
-; RV32-NEXT: .LBB5_7: # %entry
; RV32-NEXT: li a0, 0
-; RV32-NEXT: .LBB5_8: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB5_2:
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
; RV32-NEXT: ret
;
; RV64-LABEL: ustest_f32i32:
}
define i32 @ustest_f64i32_mm(double %x) {
-; RV32-LABEL: ustest_f64i32_mm:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixdfdi@plt
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: bgez a1, .LBB29_7
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: bnez a1, .LBB29_8
-; RV32-NEXT: .LBB29_2: # %entry
-; RV32-NEXT: bgez a1, .LBB29_9
-; RV32-NEXT: .LBB29_3: # %entry
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: blez a1, .LBB29_10
-; RV32-NEXT: .LBB29_4: # %entry
-; RV32-NEXT: beqz a1, .LBB29_6
-; RV32-NEXT: .LBB29_5: # %entry
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: .LBB29_6: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB29_7: # %entry
-; RV32-NEXT: li a2, -1
-; RV32-NEXT: beqz a1, .LBB29_2
-; RV32-NEXT: .LBB29_8: # %entry
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bltz a1, .LBB29_3
-; RV32-NEXT: .LBB29_9: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: bgtz a1, .LBB29_4
-; RV32-NEXT: .LBB29_10: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bnez a1, .LBB29_5
-; RV32-NEXT: j .LBB29_6
+; RV32IF-LABEL: ustest_f64i32_mm:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixdfdi@plt
+; RV32IF-NEXT: mv a2, a0
+; RV32IF-NEXT: bgez a1, .LBB29_7
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: bnez a1, .LBB29_8
+; RV32IF-NEXT: .LBB29_2: # %entry
+; RV32IF-NEXT: bgez a1, .LBB29_9
+; RV32IF-NEXT: .LBB29_3: # %entry
+; RV32IF-NEXT: mv a2, a0
+; RV32IF-NEXT: blez a1, .LBB29_10
+; RV32IF-NEXT: .LBB29_4: # %entry
+; RV32IF-NEXT: beqz a1, .LBB29_6
+; RV32IF-NEXT: .LBB29_5: # %entry
+; RV32IF-NEXT: mv a0, a2
+; RV32IF-NEXT: .LBB29_6: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+; RV32IF-NEXT: .LBB29_7: # %entry
+; RV32IF-NEXT: li a2, -1
+; RV32IF-NEXT: beqz a1, .LBB29_2
+; RV32IF-NEXT: .LBB29_8: # %entry
+; RV32IF-NEXT: mv a0, a2
+; RV32IF-NEXT: bltz a1, .LBB29_3
+; RV32IF-NEXT: .LBB29_9: # %entry
+; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: mv a2, a0
+; RV32IF-NEXT: bgtz a1, .LBB29_4
+; RV32IF-NEXT: .LBB29_10: # %entry
+; RV32IF-NEXT: li a2, 0
+; RV32IF-NEXT: bnez a1, .LBB29_5
+; RV32IF-NEXT: j .LBB29_6
;
; RV64IF-LABEL: ustest_f64i32_mm:
; RV64IF: # %bb.0: # %entry
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
;
+; RV32IFD-LABEL: ustest_f64i32_mm:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: feq.d a0, ft0, ft0
+; RV32IFD-NEXT: bnez a0, .LBB29_2
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB29_2:
+; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
; RV64IFD-LABEL: ustest_f64i32_mm:
; RV64IFD: # %bb.0: # %entry
; RV64IFD-NEXT: fmv.d.x ft0, a0
define i32 @ustest_f32i32_mm(float %x) {
; RV32-LABEL: ustest_f32i32_mm:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixsfdi@plt
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: bgez a1, .LBB32_7
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: feq.s a0, ft0, ft0
+; RV32-NEXT: bnez a0, .LBB32_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: bnez a1, .LBB32_8
-; RV32-NEXT: .LBB32_2: # %entry
-; RV32-NEXT: bgez a1, .LBB32_9
-; RV32-NEXT: .LBB32_3: # %entry
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: blez a1, .LBB32_10
-; RV32-NEXT: .LBB32_4: # %entry
-; RV32-NEXT: beqz a1, .LBB32_6
-; RV32-NEXT: .LBB32_5: # %entry
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: .LBB32_6: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB32_2:
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
; RV32-NEXT: ret
-; RV32-NEXT: .LBB32_7: # %entry
-; RV32-NEXT: li a2, -1
-; RV32-NEXT: beqz a1, .LBB32_2
-; RV32-NEXT: .LBB32_8: # %entry
-; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bltz a1, .LBB32_3
-; RV32-NEXT: .LBB32_9: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: bgtz a1, .LBB32_4
-; RV32-NEXT: .LBB32_10: # %entry
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: bnez a1, .LBB32_5
-; RV32-NEXT: j .LBB32_6
;
; RV64-LABEL: ustest_f32i32_mm:
; RV64: # %bb.0: # %entry
define arm_aapcs_vfpcc <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, r4, d9
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: csetm r2, ne
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: movs r7, #0
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vmov.i32 q6, #0x0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: vpsel q0, q0, q5
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: sbcs.w r0, r7, r1
-; CHECK-NEXT: mov.w r1, #0
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r1, r0, #0, #8
-; CHECK-NEXT: rsbs r0, r2, #0
-; CHECK-NEXT: sbcs.w r0, r7, r3
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r1, r0, #8, #8
-; CHECK-NEXT: vmov r0, r4, d8
-; CHECK-NEXT: vmsr p0, r1
-; CHECK-NEXT: vpsel q7, q0, q6
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csetm r2, ne
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: vpsel q0, q0, q5
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: sbcs.w r0, r7, r1
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: rsbs r1, r2, #0
-; CHECK-NEXT: sbcs.w r1, r7, r3
-; CHECK-NEXT: bfi r7, r0, #0, #8
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r7, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r7
-; CHECK-NEXT: vpsel q0, q0, q6
-; CHECK-NEXT: vmov.f32 s1, s2
-; CHECK-NEXT: vmov.f32 s2, s28
-; CHECK-NEXT: vmov.f32 s3, s30
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: bx lr
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
define arm_aapcs_vfpcc <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, r4, d9
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: csetm r2, ne
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: movs r7, #0
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: vmov.i32 q6, #0x0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: vpsel q0, q0, q5
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: sbcs.w r0, r7, r1
-; CHECK-NEXT: mov.w r1, #0
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r1, r0, #0, #8
-; CHECK-NEXT: rsbs r0, r2, #0
-; CHECK-NEXT: sbcs.w r0, r7, r3
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r1, r0, #8, #8
-; CHECK-NEXT: vmov r0, r4, d8
-; CHECK-NEXT: vmsr p0, r1
-; CHECK-NEXT: vpsel q7, q0, q6
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2lz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: cset r2, lt
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: csetm r2, ne
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: vpsel q0, q0, q5
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: sbcs.w r0, r7, r1
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: rsbs r1, r2, #0
-; CHECK-NEXT: sbcs.w r1, r7, r3
-; CHECK-NEXT: bfi r7, r0, #0, #8
-; CHECK-NEXT: cset r0, lt
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: csetm r0, ne
-; CHECK-NEXT: bfi r7, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r7
-; CHECK-NEXT: vpsel q0, q0, q6
-; CHECK-NEXT: vmov.f32 s1, s2
-; CHECK-NEXT: vmov.f32 s2, s28
-; CHECK-NEXT: vmov.f32 s3, s30
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: bx lr
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
define i32 @ustest_f64i32(double %x) {
; CHECK-LABEL: ustest_f64i32:
; CHECK: .functype ustest_f64i32 (f64) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f64_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f64_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi double %x to i64
define i32 @ustest_f32i32(float %x) {
; CHECK-LABEL: ustest_f32i32:
; CHECK: .functype ustest_f32i32 (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi float %x to i64
define i32 @ustest_f16i32(half %x) {
; CHECK-LABEL: ustest_f16i32:
; CHECK: .functype ustest_f16i32 (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi half %x to i64
define i32 @ustest_f64i32_mm(double %x) {
; CHECK-LABEL: ustest_f64i32_mm:
; CHECK: .functype ustest_f64i32_mm (f64) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f64_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f64_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi double %x to i64
define i32 @ustest_f32i32_mm(float %x) {
; CHECK-LABEL: ustest_f32i32_mm:
; CHECK: .functype ustest_f32i32_mm (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi float %x to i64
define i32 @ustest_f16i32_mm(half %x) {
; CHECK-LABEL: ustest_f16i32_mm:
; CHECK: .functype ustest_f16i32_mm (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi half %x to i64
define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32:
; CHECK: .functype ustest_f32i32 (v128) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 0
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 1
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 2
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 3
-; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 3
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-LABEL: ustest_f16i32:
; CHECK: .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 3
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 2
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 6
-; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: local.get 6
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 6
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: local.get 6
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32_mm:
; CHECK: .functype ustest_f32i32_mm (v128) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 0
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 1
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 2
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 3
-; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 3
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-LABEL: ustest_f16i32_mm:
; CHECK: .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 3
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 2
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 6
-; CHECK-NEXT: v128.const 0, 0
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: local.get 6
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.tee 6
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: local.get 6
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64x2.lt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64x2.gt_s
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x half> %x to <4 x i64>