+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s
declare half @llvm.aarch64.neon.fmulx.f16(half, half)
define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfma_lane_f16:
-; CHECK: dup v2.4h, v2.h[0]
+; CHECK: .Lt_vfma_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: dup v2.4h, v2.h[0]
; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
entry:
define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmaq_lane_f16:
-; CHECK: dup v2.8h, v2.h[0]
+; CHECK: .Lt_vfmaq_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: dup v2.8h, v2.h[0]
; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
entry:
define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfma_laneq_f16:
-; CHECK: dup v2.4h, v2.h[0]
+; CHECK: .Lt_vfma_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: dup v2.4h, v2.h[0]
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
entry:
define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmaq_laneq_f16:
-; CHECK: dup v2.8h, v2.h[0]
+; CHECK: .Lt_vfmaq_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: dup v2.8h, v2.h[0]
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.8h
; CHECK-NEXT: ret
entry:
define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
; CHECK-LABEL: t_vfma_n_f16:
-; CHECK: dup v2.4h, v2.h[0]
+; CHECK: .Lt_vfma_n_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: dup v2.4h, v2.h[0]
; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
entry:
define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
; CHECK-LABEL: t_vfmaq_n_f16:
-; CHECK: dup v2.8h, v2.h[0]
+; CHECK: .Lt_vfmaq_n_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: dup v2.8h, v2.h[0]
; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
entry:
define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmah_lane_f16:
-; CHECK: fmadd h0, h1, h2, h0
+; CHECK: .Lt_vfmah_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmadd h0, h1, h2, h0
; CHECK-NEXT: ret
entry:
%extract = extractelement <4 x half> %c, i32 0
define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmah_laneq_f16:
-; CHECK: fmadd h0, h1, h2, h0
+; CHECK: .Lt_vfmah_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmadd h0, h1, h2, h0
; CHECK-NEXT: ret
entry:
%extract = extractelement <8 x half> %c, i32 0
define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfms_lane_f16:
-; CHECK: fneg v1.4h, v1.4h
+; CHECK: .Lt_vfms_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fneg v1.4h, v1.4h
; CHECK-NEXT: dup v2.4h, v2.h[0]
; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsq_lane_f16:
-; CHECK: fneg v1.8h, v1.8h
+; CHECK: .Lt_vfmsq_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fneg v1.8h, v1.8h
; CHECK-NEXT: dup v2.8h, v2.h[0]
; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfms_laneq_f16:
-; CHECK: dup v2.4h, v2.h[0]
+; CHECK: .Lt_vfms_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: dup v2.4h, v2.h[0]
; CHECK-NEXT: fmls v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
entry:
define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsq_laneq_f16:
-; CHECK: dup v2.8h, v2.h[0]
+; CHECK: .Lt_vfmsq_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: dup v2.8h, v2.h[0]
; CHECK-NEXT: fmls v0.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
entry:
define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
; CHECK-LABEL: t_vfms_n_f16:
-; CHECK: fneg v1.4h, v1.4h
+; CHECK: .Lt_vfms_n_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: fneg v1.4h, v1.4h
; CHECK-NEXT: dup v2.4h, v2.h[0]
; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
; CHECK-LABEL: t_vfmsq_n_f16:
-; CHECK: fneg v1.8h, v1.8h
+; CHECK: .Lt_vfmsq_n_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: fneg v1.8h, v1.8h
; CHECK-NEXT: dup v2.8h, v2.h[0]
; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsh_lane_f16:
-; CHECK: fmsub h0, h1, h2, h0
+; CHECK: .Lt_vfmsh_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmsub h0, h1, h2, h0
; CHECK-NEXT: ret
entry:
%0 = fsub half 0xH8000, %b
define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsh_laneq_f16:
-; CHECK: fmsub h0, h1, h2, h0
-; CHECK-NEXT: ret
+; CHECK: .Lt_vfmsh_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmsub h0, h1, h2, h0
+; CHECK-NEXT: ret
entry:
%0 = fsub half 0xH8000, %b
%extract = extractelement <8 x half> %c, i32 0
define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmul_laneq_f16:
-; CHECK: fmul v0.4h, v0.4h, v1.h[0]
+; CHECK: .Lt_vmul_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulq_laneq_f16:
-; CHECK: fmul v0.8h, v0.8h, v1.h[0]
+; CHECK: .Lt_vmulq_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vmulh_lane_f16:
-; CHECK: fmul h0, h0, v1.h[0]
+; CHECK: .Lt_vmulh_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmul h0, h0, v1.h[0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <4 x half> %c, i32 0
define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vmulh_laneq_f16:
-; CHECK: fmul h0, h0, v1.h[0]
+; CHECK: .Lt_vmulh_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmul h0, h0, v1.h[0]
; CHECK-NEXT: ret
entry:
%0 = extractelement <8 x half> %c, i32 0
define dso_local half @t_vmulx_f16(half %a, half %b) {
; CHECK-LABEL: t_vmulx_f16:
-; CHECK: fmulx h0, h0, h1
+; CHECK: .Lt_vmulx_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmulx h0, h0, h1
; CHECK-NEXT: ret
entry:
%fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxh_lane_f16:
-; CHECK: fmulx h0, h0, v1.h[3]
+; CHECK: .Lt_vmulxh_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmulx h0, h0, v1.h[3]
; CHECK-NEXT: ret
entry:
%extract = extractelement <4 x half> %b, i32 3
define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulx_lane_f16:
-; CHECK: fmulx v0.4h, v0.4h, v1.h[0]
+; CHECK: .Lt_vmulx_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer
define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxq_lane_f16:
-; CHECK: fmulx v0.8h, v0.8h, v1.h[0]
+; CHECK: .Lt_vmulxq_lane_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer
define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulx_laneq_f16:
-; CHECK: fmulx v0.4h, v0.4h, v1.h[0]
+; CHECK: .Lt_vmulx_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxq_laneq_f16:
-; CHECK: fmulx v0.8h, v0.8h, v1.h[0]
+; CHECK: .Lt_vmulxq_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxh_laneq_f16:
-; CHECK: fmulx h0, h0, v1.h[7]
+; CHECK: .Lt_vmulxh_laneq_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: fmulx h0, h0, v1.h[7]
; CHECK-NEXT: ret
entry:
%extract = extractelement <8 x half> %b, i32 7
define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
; CHECK-LABEL: t_vmulx_n_f16:
-; CHECK: dup v1.4h, v1.h[0]
+; CHECK: .Lt_vmulx_n_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
+; CHECK-NEXT: dup v1.4h, v1.h[0]
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
entry:
define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
; CHECK-LABEL: t_vmulxq_n_f16:
-; CHECK: dup v1.8h, v1.h[0]
+; CHECK: .Lt_vmulxq_n_f16$local:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
+; CHECK-NEXT: dup v1.8h, v1.h[0]
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
entry: