From 3ee87a976d52a2379d007046f9a1ad4a07f440c0 Mon Sep 17 00:00:00 2001 From: Sanne Wouda Date: Fri, 4 Sep 2020 16:58:02 +0100 Subject: [PATCH] Precommit test updates --- llvm/test/CodeGen/AArch64/faddp-half.ll | 153 +++++++++++++++ llvm/test/CodeGen/AArch64/faddp.ll | 116 ++++++++++++ llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 279 ++++++++++++++++++++++------ 3 files changed, 491 insertions(+), 57 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/faddp-half.ll create mode 100644 llvm/test/CodeGen/AArch64/faddp.ll diff --git a/llvm/test/CodeGen/AArch64/faddp-half.ll b/llvm/test/CodeGen/AArch64/faddp-half.ll new file mode 100644 index 0000000..d89205d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/faddp-half.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=aarch64 -mattr=+fullfp16 < %s | FileCheck %s +; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=CHECKNOFP16 + +define half @faddp_2xhalf(<2 x half> %a) { +; CHECK-LABEL: faddp_2xhalf: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[1] +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: faddp_2xhalf: +; CHECKNOFP16: // %bb.0: // %entry +; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] +; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECKNOFP16-NEXT: ret +entry: + %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> + %0 = fadd <2 x half> %a, %shift + %1 = extractelement <2 x half> %0, i32 0 + ret half %1 +} + +define half @faddp_2xhalf_commute(<2 x half> %a) { +; CHECK-LABEL: faddp_2xhalf_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[1] +; CHECK-NEXT: fadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: faddp_2xhalf_commute: +; CHECKNOFP16: // %bb.0: // %entry +; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] +; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECKNOFP16-NEXT: ret +entry: + %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> + %0 = fadd <2 x half> %shift, %a + %1 = extractelement <2 x half> %0, i32 0 + ret half %1 +} + +define half @faddp_4xhalf(<4 x half> %a) { +; CHECK-LABEL: faddp_4xhalf: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[1] +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: faddp_4xhalf: +; CHECKNOFP16: // %bb.0: // %entry +; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] +; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECKNOFP16-NEXT: ret +entry: + %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> + %0 = fadd <4 x half> %a, %shift + %1 = extractelement <4 x half> %0, i32 0 + ret half %1 +} + +define half @faddp_4xhalf_commute(<4 x half> %a) { +; CHECK-LABEL: faddp_4xhalf_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[1] +; CHECK-NEXT: fadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: faddp_4xhalf_commute: +; CHECKNOFP16: // %bb.0: // %entry +; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] +; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECKNOFP16-NEXT: ret +entry: + %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> + %0 = fadd <4 x half> %shift, %a + %1 = extractelement <4 x half> %0, i32 0 + ret half %1 +} + +define half @faddp_8xhalf(<8 x half> %a) { +; CHECK-LABEL: faddp_8xhalf: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.8h, v0.h[1] +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: faddp_8xhalf: +; CHECKNOFP16: // %bb.0: // %entry +; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1] +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s0, s0, s1 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: ret +entry: + %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> + %0 = fadd <8 x half> %a, %shift + %1 = extractelement <8 x half> %0, i32 0 + ret half %1 +} + +define half @faddp_8xhalf_commute(<8 x half> %a) { +; CHECK-LABEL: faddp_8xhalf_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.8h, v0.h[1] +; CHECK-NEXT: fadd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: faddp_8xhalf_commute: +; CHECKNOFP16: // %bb.0: // %entry +; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1] +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s0, s1, s0 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: ret +entry: + %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> + %0 = fadd <8 x half> %shift, %a + %1 = extractelement <8 x half> %0, i32 0 + ret half %1 +} diff --git a/llvm/test/CodeGen/AArch64/faddp.ll b/llvm/test/CodeGen/AArch64/faddp.ll new file mode 100644 index 0000000..299ff08 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/faddp.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple aarch64 < %s | FileCheck %s + +define float @faddp_2xfloat(<2 x float> %a) { +; CHECK-LABEL: faddp_2xfloat: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> + %0 = fadd <2 x float> %a, %shift + %1 = extractelement <2 x float> %0, i32 0 + ret float %1 +} + +define float @faddp_4xfloat(<4 x float> %a) { +; CHECK-LABEL: faddp_4xfloat: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.4s, v0.s[1] +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %0 = fadd <4 x float> %a, %shift + %1 = extractelement <4 x float> %0, i32 0 + ret float %1 +} + +define float @faddp_4xfloat_commute(<4 x float> %a) { +; CHECK-LABEL: faddp_4xfloat_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.4s, v0.s[1] +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %0 = fadd <4 x float> %shift, %a + %1 = extractelement <4 x float> %0, i32 0 + ret float %1 +} + +define float @faddp_2xfloat_commute(<2 x float> %a) { +; CHECK-LABEL: faddp_2xfloat_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: fadd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> + %0 = fadd <2 x float> %shift, %a + %1 = extractelement <2 x float> %0, i32 0 + ret float %1 +} + +define double @faddp_2xdouble(<2 x double> %a) { +; CHECK-LABEL: faddp_2xdouble: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.2d, v0.d[1] +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> + %0 = fadd <2 x double> %a, %shift + %1 = extractelement <2 x double> %0, i32 0 + ret double %1 +} + +define double @faddp_2xdouble_commute(<2 x double> %a) { +; CHECK-LABEL: faddp_2xdouble_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.2d, v0.d[1] +; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> + %0 = fadd <2 x double> %shift, %a + %1 = extractelement <2 x double> %0, i32 0 + ret double %1 +} + +define i64 @addp_2xi64(<2 x i64> %a) { +; CHECK-LABEL: addp_2xi64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.2d, v0.d[1] +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %0 = add <2 x i64> %a, %shift + %1 = extractelement <2 x i64> %0, i32 0 + ret i64 %1 +} + +define i64 @addp_2xi64_commute(<2 x i64> %a) { +; CHECK-LABEL: addp_2xi64_commute: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v1.2d, v0.d[1] +; CHECK-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %0 = add <2 x i64> %shift, %a + %1 = extractelement <2 x i64> %0, i32 0 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 3df3d2a..9552f4d 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -1,28 +1,54 @@ -; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic -asm-verbose=0 -mattr=+fullfp16 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic -asm-verbose=0 | FileCheck %s --check-prefix=CHECKNOFP16 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=aarch64-eabi -aarch64-neon-syntax=generic -mattr=+fullfp16 < %s | FileCheck %s +; RUN: llc --mtriple=aarch64-eabi -aarch64-neon-syntax=generic < %s | FileCheck %s --check-prefix=CHECKNOFP16 define float @add_HalfS(<2 x float> %bin.rdx) { ; CHECK-LABEL: add_HalfS: -; CHECK: faddp s0, v0.2s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: faddp s0, v0.2s +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: add_HalfS: +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKNOFP16-NEXT: faddp s0, v0.2s +; CHECKNOFP16-NEXT: ret %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx) ret float %r } define half @add_HalfH(<4 x half> %bin.rdx) { ; CHECK-LABEL: add_HalfH: -; CHECK: mov h3, v0.h[1] -; CHECK-NEXT: mov h1, v0.h[3] -; CHECK-NEXT: mov h2, v0.h[2] -; CHECK-NEXT: fadd h0, h0, h3 -; CHECK-NEXT: fadd h0, h0, h2 -; CHECK-NEXT: fadd h0, h0, h1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h1, v0.h[3] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fadd h0, h0, h3 +; CHECK-NEXT: fadd h0, h0, h2 +; CHECK-NEXT: fadd h0, h0, h1 +; CHECK-NEXT: ret +; ; CHECKNOFP16-LABEL: add_HalfH: -; CHECKNOFP16-NOT: faddp -; CHECKNOFP16-NOT: fadd h{{[0-9]+}} -; CHECKNOFP16-NOT: fadd v{{[0-9]+}}.{{[0-9]}}h -; CHECKNOFP16: ret +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKNOFP16-NEXT: mov h3, v0.h[1] +; CHECKNOFP16-NEXT: mov h1, v0.h[3] +; CHECKNOFP16-NEXT: mov h2, v0.h[2] +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fadd s0, s0, s3 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s2 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s0, s0, s1 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: ret %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx) ret half %r } @@ -30,80 +56,219 @@ define half @add_HalfH(<4 x half> %bin.rdx) { define half @add_H(<8 x half> %bin.rdx) { ; CHECK-LABEL: add_H: -; CHECK: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov h2, v0.h[2] -; CHECK-NEXT: fadd h1, h0, h1 -; CHECK-NEXT: fadd h1, h1, h2 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fadd h0, h1, h0 -; CHECK-NEXT: ret - +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fadd h1, h0, h1 +; CHECK-NEXT: fadd h1, h1, h2 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: fadd h0, h1, h0 +; CHECK-NEXT: ret +; ; CHECKNOFP16-LABEL: add_H: -; CHECKNOFP16-NOT: faddp -; CHECKNOFP16-NOT: fadd h{{[0-9]+}} -; CHECKNOFP16-NOT: fadd v{{[0-9]+}}.{{[0-9]}}h -; CHECKNOFP16: ret +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: mov h7, v0.h[1] +; CHECKNOFP16-NEXT: mov h1, v0.h[7] +; CHECKNOFP16-NEXT: mov h2, v0.h[6] +; CHECKNOFP16-NEXT: mov h3, v0.h[5] +; CHECKNOFP16-NEXT: mov h4, v0.h[4] +; CHECKNOFP16-NEXT: mov h5, v0.h[3] +; CHECKNOFP16-NEXT: mov h6, v0.h[2] +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt s7, h7 +; CHECKNOFP16-NEXT: fadd s0, s0, s7 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s6, h6 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s6 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s5 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s4 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s3 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s2 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s0, s0, s1 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: ret + %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx) ret half %r } define float @add_S(<4 x float> %bin.rdx) { ; CHECK-LABEL: add_S: -; CHECK: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: faddp s0, v0.2s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s +; CHECK-NEXT: faddp s0, v0.2s +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: add_S: +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s +; CHECKNOFP16-NEXT: faddp s0, v0.2s +; CHECKNOFP16-NEXT: ret %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx) ret float %r } define double @add_D(<2 x double> %bin.rdx) { ; CHECK-LABEL: add_D: -; CHECK: faddp d0, v0.2d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: faddp d0, v0.2d +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: add_D: +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: faddp d0, v0.2d +; CHECKNOFP16-NEXT: ret %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx) ret double %r } define half @add_2H(<16 x half> %bin.rdx) { ; CHECK-LABEL: add_2H: -; CHECK: fadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov h2, v0.h[2] -; CHECK-NEXT: fadd h1, h0, h1 -; CHECK-NEXT: fadd h1, h1, h2 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fadd h0, h1, h0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: mov h2, v0.h[2] +; CHECK-NEXT: fadd h1, h0, h1 +; CHECK-NEXT: fadd h1, h1, h2 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: fadd h0, h1, h0 +; CHECK-NEXT: ret +; ; CHECKNOFP16-LABEL: add_2H: -; CHECKNOFP16-NOT: faddp -; CHECKNOFP16-NOT: fadd h{{[0-9]+}} -; CHECKNOFP16-NOT: fadd v{{[0-9]+}}.{{[0-9]}}h -; CHECKNOFP16: ret +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: mov h2, v1.h[1] +; CHECKNOFP16-NEXT: mov h3, v0.h[1] +; CHECKNOFP16-NEXT: mov h6, v1.h[2] +; CHECKNOFP16-NEXT: mov h7, v0.h[2] +; CHECKNOFP16-NEXT: mov h16, v1.h[3] +; CHECKNOFP16-NEXT: mov h17, v0.h[3] +; CHECKNOFP16-NEXT: fcvt s4, h1 +; CHECKNOFP16-NEXT: fcvt s5, h0 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fcvt s6, h6 +; CHECKNOFP16-NEXT: fcvt s7, h7 +; CHECKNOFP16-NEXT: fcvt s16, h16 +; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 +; CHECKNOFP16-NEXT: mov h5, v1.h[4] +; CHECKNOFP16-NEXT: fadd s2, s3, s2 +; CHECKNOFP16-NEXT: mov h3, v0.h[4] +; CHECKNOFP16-NEXT: fadd s6, s7, s6 +; CHECKNOFP16-NEXT: mov h7, v1.h[5] +; CHECKNOFP16-NEXT: fadd s16, s17, s16 +; CHECKNOFP16-NEXT: mov h17, v0.h[5] +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fcvt s7, h7 +; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: fadd s3, s3, s5 +; CHECKNOFP16-NEXT: mov h5, v1.h[6] +; CHECKNOFP16-NEXT: fadd s7, s17, s7 +; CHECKNOFP16-NEXT: mov h17, v0.h[6] +; CHECKNOFP16-NEXT: mov h1, v1.h[7] +; CHECKNOFP16-NEXT: mov h0, v0.h[7] +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s0, s1 +; CHECKNOFP16-NEXT: fcvt h1, s4 +; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fadd s1, s1, s2 +; CHECKNOFP16-NEXT: fcvt h2, s6 +; CHECKNOFP16-NEXT: fcvt h1, s1 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s1, s1, s2 +; CHECKNOFP16-NEXT: fcvt h2, s16 +; CHECKNOFP16-NEXT: fcvt h1, s1 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s1, s1, s2 +; CHECKNOFP16-NEXT: fcvt h2, s3 +; CHECKNOFP16-NEXT: fcvt h1, s1 +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s1, s1, s2 +; CHECKNOFP16-NEXT: fcvt h3, s7 +; CHECKNOFP16-NEXT: fcvt h1, s1 +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s5, s17, s5 +; CHECKNOFP16-NEXT: fadd s1, s1, s3 +; CHECKNOFP16-NEXT: fcvt h4, s5 +; CHECKNOFP16-NEXT: fcvt h1, s1 +; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fadd s1, s1, s4 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fcvt h1, s1 +; CHECKNOFP16-NEXT: fcvt s1, h1 +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s0, s1, s0 +; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: ret %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx) ret half %r } define float @add_2S(<8 x float> %bin.rdx) { ; CHECK-LABEL: add_2S: -; CHECK: fadd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: faddp s0, v0.2s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s +; CHECK-NEXT: faddp s0, v0.2s +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: add_2S: +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECKNOFP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s +; CHECKNOFP16-NEXT: faddp s0, v0.2s +; CHECKNOFP16-NEXT: ret %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx) ret float %r } define double @add_2D(<4 x double> %bin.rdx) { ; CHECK-LABEL: add_2D: -; CHECK: fadd v0.2d, v0.2d, v1.2d -; CHECK-NEXT: faddp d0, v0.2d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: faddp d0, v0.2d +; CHECK-NEXT: ret +; +; CHECKNOFP16-LABEL: add_2D: +; CHECKNOFP16: // %bb.0: +; CHECKNOFP16-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECKNOFP16-NEXT: faddp d0, v0.2d +; CHECKNOFP16-NEXT: ret %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx) ret double %r } -- 2.7.4