setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
- // TODO: Do the same for FP_TO_*INT_SAT.
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+ setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
setTargetDAGCombine(ISD::FDIV);
// Try and combine setcc with csel
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");
+ if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT) {
+ EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (SatVT.getScalarSizeInBits() != IntBits)
+ return SDValue();
+ }
+
SDLoc DL(N);
- bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
define <2 x i32> @test1_sat(<2 x float> %f) {
; CHECK-LABEL: test1_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2s, #16.00000000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
define <4 x i32> @test2_sat(<4 x float> %f) {
; CHECK-LABEL: test2_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #65, lsl #24
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
; CHECK-NEXT: ret
%mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
%vcvt.i = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %mul.i)
define <2 x i64> @test3_sat(<2 x double> %d) {
; CHECK-LABEL: test3_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #4629700416936869888
-; CHECK-NEXT: dup v1.2d, x8
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
; CHECK-NEXT: ret
%mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %mul.i)
define <2 x i32> @test4_sat(<2 x double> %d) {
; CHECK-LABEL: test4_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2d, #16.00000000
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: mov d1, v0.d[1]
-; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fcvtzs w8, d1
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4
+; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: ret
%mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)
define <2 x i16> @test5_sat(<2 x float> %f) {
; CHECK-LABEL: test5_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2s, #16.00000000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: movi v1.2s, #127, msl #8
-; CHECK-NEXT: fcvtzs v0.2s, v0.2s
-; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mvni v1.2s, #127, msl #8
-; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i)
define <2 x i32> @test7_sat(<2 x float> %f) {
; CHECK-LABEL: test7_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2s, #16.00000000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
define <2 x i32> @test11_sat(<2 x float> %f) {
; CHECK-LABEL: test11_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2s, #65, lsl #24
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
define <2 x i32> @test14_sat(<2 x float> %f) {
; CHECK-LABEL: test14_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1333788672
-; CHECK-NEXT: dup v1.2s, w8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
define <3 x i32> @test_illegal_fp_to_int_sat_sat(<3 x float> %in) {
; CHECK-LABEL: test_illegal_fp_to_int_sat_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.4s, #4.00000000
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
; CHECK-NEXT: ret
%mul.i = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
%vcvt.i = call <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float> %mul.i)
define <4 x i32> @test_extrasat(<4 x float> %f) {
; CHECK-LABEL: test_extrasat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #65, lsl #24
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff
-; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: bic v0.4s, #255, lsl #24
; CHECK-NEXT: ret