def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin)),
+ (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin)),
+ (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
+ (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, (fneg HPR:$Sdin))),
+ (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (fneg HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
; CHECK-LABEL: fms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r0]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s4, s0
-; CHECK-NEXT: vstr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s4, s0
-; DONT-FUSE-NEXT: vstr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms2:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r0]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s0, s4
-; CHECK-NEXT: vstr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r1]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms2:
; DONT-FUSE: @ %bb.0:
-; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
-; DONT-FUSE-NEXT: vstr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r1]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
-; CHECK-NEXT: vfma.f16 s4, s2, s0
-; CHECK-NEXT: vneg.f16 s0, s4
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vfnma.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma1:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
-; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
-; DONT-FUSE-NEXT: vneg.f16 s0, s4
-; DONT-FUSE-NEXT: vstr.16 s0, [r0]
+; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
-; CHECK-NEXT: vneg.f16 s2, s2
; CHECK-NEXT: vldr.16 s4, [r2]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s4, s2, s0
+; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s2, s2
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s4, s2, s0
+; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
; CHECK-LABEL: fnms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r0]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s4, s0
-; CHECK-NEXT: vneg.f16 s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfnms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s4, s0
-; DONT-FUSE-NEXT: vneg.f16 s0, s2
-; DONT-FUSE-NEXT: vstr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms3:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr.16 s0, [r0]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r1]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s0, s4
-; CHECK-NEXT: vneg.f16 s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s0, [r1]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfnms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms3:
; DONT-FUSE: @ %bb.0:
-; DONT-FUSE-NEXT: vldr.16 s0, [r0]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r1]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
-; DONT-FUSE-NEXT: vneg.f16 s0, s2
-; DONT-FUSE-NEXT: vstr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s0, [r1]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2