defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
+
+let Predicates = [HasFullFP16] in {
+def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
+ (FADDPv2i16p
+ (EXTRACT_SUBREG
+ (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
+ dsub))>;
+def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
+ (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
+}
+def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
+ (FADDPv2i32p
+ (EXTRACT_SUBREG
+ (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
+ dsub))>;
+def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
+ (FADDPv2i32p V64:$Rn)>;
+def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
+ (FADDPv2i64p V128:$Rn)>;
+
def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
; CHECK-NEXT: mov w8, #-2147483648
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
; CHECK-NEXT: mov v0.s[3], v3.s[0]
; CHECK-NEXT: mov v5.s[0], v4.s[0]
; CHECK-NEXT: fadd v0.4s, v0.4s, v5.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
; CHECK-NEXT: fadd v1.4s, v1.4s, v3.4s
; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
define float @add_HalfS(<2 x float> %bin.rdx) {
; CHECK-LABEL: add_HalfS:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
;
; CHECKNOFP16-LABEL: add_HalfS:
; CHECKNOFP16: // %bb.0:
-; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECKNOFP16-NEXT: faddp s0, v0.2s
; CHECKNOFP16-NEXT: ret
%r = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
define half @add_HalfH(<4 x half> %bin.rdx) {
; CHECK-LABEL: add_HalfH:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[3]
-; CHECK-NEXT: mov h2, v0.h[2]
+; CHECK-NEXT: faddp v0.4h, v0.4h, v0.4h
; CHECK-NEXT: faddp h0, v0.2h
-; CHECK-NEXT: fadd h0, h0, h2
-; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
;
; CHECKNOFP16-LABEL: add_HalfH:
define half @add_H(<8 x half> %bin.rdx) {
; CHECK-LABEL: add_H:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: mov h1, v0.h[2]
-; CHECK-NEXT: faddp h2, v0.2h
-; CHECK-NEXT: fadd h1, h2, h1
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fadd h0, h1, h0
+; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: faddp h0, v0.2h
; CHECK-NEXT: ret
;
; CHECKNOFP16-LABEL: add_H:
define float @add_S(<4 x float> %bin.rdx) {
; CHECK-LABEL: add_S:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
;
; CHECKNOFP16-LABEL: add_S:
; CHECKNOFP16: // %bb.0:
-; CHECKNOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECKNOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECKNOFP16-NEXT: faddp s0, v0.2s
; CHECKNOFP16-NEXT: ret
%r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %bin.rdx)
; CHECK-LABEL: add_2H:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: mov h1, v0.h[2]
-; CHECK-NEXT: faddp h2, v0.2h
-; CHECK-NEXT: fadd h1, h2, h1
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fadd h0, h1, h0
+; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: faddp h0, v0.2h
; CHECK-NEXT: ret
;
; CHECKNOFP16-LABEL: add_2H:
; CHECK-LABEL: add_2S:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
;
; CHECKNOFP16-LABEL: add_2S:
; CHECKNOFP16: // %bb.0:
; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECKNOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECKNOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECKNOFP16-NEXT: faddp s0, v0.2s
; CHECKNOFP16-NEXT: ret
%r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %bin.rdx)
ret double %r
}
+; Added at least one test where the start value is not -0.0.
+define float @add_S_init_42(<4 x float> %bin.rdx) {
+; CHECK-LABEL: add_S_init_42:
+; CHECK: // %bb.0:
+; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: mov w8, #1109917696
+; CHECK-NEXT: faddp s0, v0.2s
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+;
+; CHECKNOFP16-LABEL: add_S_init_42:
+; CHECKNOFP16: // %bb.0:
+; CHECKNOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
+; CHECKNOFP16-NEXT: mov w8, #1109917696
+; CHECKNOFP16-NEXT: faddp s0, v0.2s
+; CHECKNOFP16-NEXT: fmov s1, w8
+; CHECKNOFP16-NEXT: fadd s0, s0, s1
+; CHECKNOFP16-NEXT: ret
+ %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
+ ret float %r
+}
+
; Function Attrs: nounwind readnone
declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>)