From 034a27e6882f0fd63eb0f128b0b124cd0c3a7301 Mon Sep 17 00:00:00 2001 From: Rosie Sumpter Date: Wed, 20 Jul 2022 13:53:59 +0100 Subject: [PATCH] [AArch64] Add f16 fpimm patterns This patch recognizes f16 immediates as legal and adds the necessary patterns. This allows the fadda folding introduced in 05d424d16563 to be applied to the f16 cases. Differential Revision: https://reviews.llvm.org/D129989 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 ++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 ++ llvm/test/CodeGen/AArch64/f16-imm.ll | 54 ++++++---- llvm/test/CodeGen/AArch64/isinf.ll | 6 +- llvm/test/CodeGen/AArch64/sve-fadda-select.ll | 23 +---- llvm/test/CodeGen/AArch64/sve-fp-reduce.ll | 32 +++--- llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll | 132 +++++++++++------------- llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll | 111 +++++++++----------- 9 files changed, 182 insertions(+), 192 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e070ce2..8a80875 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -974,6 +974,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(Op, VT, Custom); if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a7b7e52..926e7305 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4052,6 +4052,12 @@ def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; +// Pattern for FP16 immediates +let Predicates = [HasFullFP16] in { + def : Pat<(f16 fpimm:$in), + (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; +} + //===----------------------------------------------------------------------===// // Floating point conversion instruction. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0436404..686ffe9 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -287,6 +287,8 @@ def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWith def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f16 fpimm_minus0)))), + (AArch64fadda_p_node (SVEAllActive), node:$op2, (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), (AArch64fadda_p_node (SVEAllActive), node:$op2, (vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>; @@ -718,6 +720,12 @@ let Predicates = [HasSVEorSME] in { (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. + def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv4f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; + def : Pat<(nxv8f16 (splat_vector (f16 fpimm:$val))), + (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll index f6a6347..7f31465 100644 --- a/llvm/test/CodeGen/AArch64/f16-imm.ll +++ b/llvm/test/CodeGen/AArch64/f16-imm.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-NOZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-ZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFP16 define half @Const0() { ; CHECK-NOZCZ-LABEL: Const0: @@ -84,31 +84,49 @@ entry: } define half @Const5() { -; CHECK-LABEL: Const5: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const5: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #12272 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const5: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI5_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI5_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH2FF0 } define half @Const6() { -; CHECK-LABEL: Const6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const6: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #20417 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const6: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI6_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI6_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH4FC1 } define half @Const7() { -; CHECK-LABEL: Const7: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: ret +; CHECK-FP16-LABEL: Const7: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: mov w8, #20480 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-LABEL: Const7: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: adrp x8, .LCPI7_0 +; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI7_0] +; CHECK-NOFP16-NEXT: ret entry: ret half 0xH5000 } diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 8741b23..947e73cb 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -6,13 +6,13 @@ declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) -; INFINITY requires loading the constant for _Float16 +; Check if INFINITY for _Float16 is materialized define i32 @replace_isinf_call_f16(half %x) { ; CHECK-LABEL: replace_isinf_call_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: mov w8, #31744 ; CHECK-NEXT: fabs h0, h0 -; CHECK-NEXT: ldr h1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: fmov h1, w8 ; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll index e029840..b4d6aa5 100644 --- a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll @@ -45,18 +45,11 @@ define double @pred_fadda_nxv2f64(double %x, %y, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.d }, p1/z, [x8] -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -69,13 +62,8 @@ define half @pred_fadda_nxv2f16(half %x, %y, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.s }, p1/z, [x8] -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 @@ -88,13 +76,8 @@ define half @pred_fadda_nxv4f16(half %x, %y, %y, %mask) { ; CHECK-LABEL: pred_fadda_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 -; CHECK-NEXT: ld1rh { z2.h }, p1/z, [x8] -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadda h0, p1, h0, z1.h +; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %i = insertelement poison, half -0.000000e+00, i32 0 diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll index d525eee..6c41f62 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll @@ -47,14 +47,13 @@ define half @fadda_nxv6f16( %v, half %s) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ld1rh { z0.d }, p1/z, [x8] -; CHECK-NEXT: st1h { z0.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: st1h { z2.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp] ; CHECK-NEXT: fadda h0, p0, h0, z2.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -73,22 +72,21 @@ define half @fadda_nxv10f16( %v, half %s) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z1.h }, p0, [sp] ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: ld1rh { z1.d }, p1/z, [x8] +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [sp, #6, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1h { z1.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [sp, #6, mul vl] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1h { z3.d }, p1, [x8, #7, mul vl] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 @@ -102,14 +100,12 @@ define half @fadda_nxv10f16( %v, half %s) { define half @fadda_nxv12f16( %v, half %s) { ; CHECK-LABEL: fadda_nxv12f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x8] ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h ; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll index 2bba40d..b91da38 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -508,19 +508,17 @@ declare @llvm.fptosi.sat.nxv4f16.nxv4i64() define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z3.d, #0xffffffff80000000 -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI14_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_1 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.d, #0x7fffffff ; CHECK-NEXT: mov z1.d, p1/m, z2.d @@ -535,23 +533,21 @@ define @test_signed_v2f16_v2i32( %f) { define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: mov w8, #64511 +; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: adrp x9, .LCPI15_1 -; CHECK-NEXT: add x9, x9, :lo12:.LCPI15_1 -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x9] -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov z2.s, w9 +; CHECK-NEXT: mov w9, #2147483647 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, p1/m, z3.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z2.s, w9 ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h ; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 @@ -564,31 +560,29 @@ define @test_signed_v4f16_v4i32( %f) { define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: mov w8, #64511 +; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: adrp x9, .LCPI16_1 -; CHECK-NEXT: add x9, x9, :lo12:.LCPI16_1 ; CHECK-NEXT: uunpklo z2.s, z0.h -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: mov w8, #-2147483648 ; CHECK-NEXT: uunpkhi z6.s, z0.h -; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x9] ; CHECK-NEXT: movprfx z4, z2 ; CHECK-NEXT: fcvtzs z4.s, p0/m, z2.h -; CHECK-NEXT: mov z5.s, w8 -; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: mov w9, #2147483647 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h ; CHECK-NEXT: fcmge p2.h, p0/z, z6.h, z1.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z5.h, w8 ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z4.s, p1/m, z5.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z3.h -; CHECK-NEXT: mov z7.s, w8 +; CHECK-NEXT: mov z4.s, p1/m, z3.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h +; CHECK-NEXT: mov z7.s, w9 ; CHECK-NEXT: movprfx z0, z6 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z6.h -; CHECK-NEXT: sel z1.s, p2, z5.s, z0.s -; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z3.h +; CHECK-NEXT: sel z1.s, p2, z3.s, z0.s +; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z5.h ; CHECK-NEXT: sel z0.s, p1, z7.s, z4.s ; CHECK-NEXT: mov z1.s, p2/m, z7.s ; CHECK-NEXT: fcmuo p1.h, p0/z, z2.h, z2.h @@ -603,23 +597,21 @@ define @test_signed_v8f16_v8i32( %f) { define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: mov w8, #63488 +; CHECK-NEXT: mov w9, #30719 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI17_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_1 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] +; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w9 ; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -630,23 +622,21 @@ define @test_signed_v4f16_v4i16( %f) { define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: mov w8, #63488 +; CHECK-NEXT: mov w9, #30719 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1rh { z1.h }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_1 -; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] +; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w9 ; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.h, p1/m, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.h, p1/m, z2.h ; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -657,19 +647,17 @@ define @test_signed_v8f16_v8i16( %f) { define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z3.d, #0x8000000000000000 -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI19_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_1 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z3.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: mov z1.d, p1/m, z2.d @@ -684,32 +672,30 @@ define @test_signed_v2f16_v2i64( %f) { define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z4.d, z0.s -; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: mov w9, #31743 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z5.d, z0.s -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: adrp x8, .LCPI20_1 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_1 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] -; CHECK-NEXT: fcmge p1.h, p0/z, z4.h, z1.h -; CHECK-NEXT: movprfx z0, z4 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, z1.h +; CHECK-NEXT: mov z4.h, w9 ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z4.h +; CHECK-NEXT: mov z0.d, p1/m, z2.d ; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h ; CHECK-NEXT: movprfx z1, z5 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z4.h, z2.h -; CHECK-NEXT: mov z1.d, p1/m, z3.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z2.h +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z4.h ; CHECK-NEXT: mov z0.d, p2/m, z6.d ; CHECK-NEXT: mov z1.d, p1/m, z6.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z4.h, z4.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll index fe68fe3..b321c18 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -400,18 +400,16 @@ declare @llvm.fptoui.sat.nxv4f16.nxv4i64() define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI14_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z1.d }, p0/z, [x8] -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p2/m, z1.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) ret %x @@ -420,14 +418,13 @@ define @test_signed_v2f16_v2i32( %f) { define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI15_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff @@ -440,24 +437,23 @@ define @test_signed_v4f16_v4i32( %f) { define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI16_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: uunpkhi z4.s, z0.h -; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: ld1rh { z2.s }, p0/z, [x8] -; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzu z0.s, p0/m, z3.h -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z4.h -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, #0.0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h -; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h ; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret @@ -468,19 +464,17 @@ define @test_signed_v8f16_v8i32( %f) { define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI17_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w9, #65535 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z1.s }, p0/z, [x8] -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.s, p2/m, z1.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) ret %x @@ -489,14 +483,13 @@ define @test_signed_v4f16_v4i16( %f) { define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI18_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z2.h }, p0/z, [x8] ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff @@ -509,14 +502,13 @@ define @test_signed_v8f16_v8i16( %f) { define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI19_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff @@ -529,24 +521,23 @@ define @test_signed_v2f16_v2i64( %f) { define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 +; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z4.d, z0.s -; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: ld1rh { z2.d }, p0/z, [x8] -; CHECK-NEXT: fcmge p2.h, p0/z, z4.h, #0.0 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z3.h -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z4.h -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, #0.0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z3.h, z2.h -; CHECK-NEXT: fcmgt p0.h, p0/z, z4.h, z2.h ; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret -- 2.7.4