From: Sjoerd Meijer Date: Tue, 6 Apr 2021 07:53:42 +0000 (+0100) Subject: [AArch64] Default to zero-cycle-zeroing FP registers X-Git-Tag: llvmorg-14-init~10395 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d5f1131c812df57560c7563475cb0d674a101636;p=platform%2Fupstream%2Fllvm.git [AArch64] Default to zero-cycle-zeroing FP registers It is generally beneficial to prefer "movi d0, #0" over "fmov s0, wzr" as this is most efficient across all cores; it is recognised as a zeroing idiom. For newer cores, fmov instructions can also be eliminated early and there is no difference with movi, but some implementations lack this so is not true for other/older cores. Thus this standardises on using movi as this should always gives the same or better performance than the fmov with wzr. Differential Revision: https://reviews.llvm.org/D99586 --- diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index bdf2e51..133a6b1 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -147,12 +147,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", "Has zero-cycle zeroing instructions for generic registers">; -def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true", - "Has zero-cycle zeroing instructions for FP registers">; +def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", + "Has no zero-cycle zeroing instructions for FP registers">; def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions", - [FeatureZCZeroingGP, FeatureZCZeroingFP]>; + [FeatureZCZeroingGP]>; /// ... but the floating-point version doesn't quite work in rare cases on older /// CPUs. @@ -915,8 +915,7 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureLSLFast, FeaturePerfMon, FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - FeatureZCZeroingFP]>; + FeaturePredictableSelectIsExpensive]>; def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", "Samsung Exynos-M4 processors", diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 6447103..ce5a012 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -196,9 +196,14 @@ protected: // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. bool HasZeroCycleZeroing = false; bool HasZeroCycleZeroingGP = false; - bool HasZeroCycleZeroingFP = false; bool HasZeroCycleZeroingFPWorkaround = false; + // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". + // as movi is more efficient across all cores. Newer cores can eliminate + // fmovs early and there is no difference with movi, but this not true for + // all implementations. + bool HasZeroCycleZeroingFP = true; + // StrictAlign - Disallow unaligned memory accesses. bool StrictAlign = false; diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll index 8703b2e..a09aae2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll @@ -15,7 +15,7 @@ define float @fmov_float2() { ; CHECK-LABEL: fmov_float2 ; CHECK: fmov s0, wzr ; GISEL-LABEL: fmov_float2 -; GISEL: fmov s0, wzr +; GISEL: movi d0, #0000000000000000 ret float 0.0e+00 } @@ -31,7 +31,7 @@ define double @fmov_double2() { ; CHECK-LABEL: fmov_double2 ; CHECK: fmov d0, xzr ; GISEL-LABEL: fmov_double2 -; GISEL: fmov d0, xzr +; GISEL: movi d0, #0000000000000000 ret double 0.0e+00 } diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll b/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll index 70548ca..9a75374 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64 -fp-contract=fast -o - %s | FileCheck %s @@ -5,8 +6,11 @@ ; -0.0. It's also good, though not essential, that we don't resort to a litpool. define double @test_fms_fold(double %a, double %b) { ; CHECK-LABEL: test_fms_fold: -; CHECK: fmov {{d[0-9]+}}, xzr -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movi d2, #0000000000000000 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fnmsub d0, d0, d2, d1 +; CHECK-NEXT: ret %mul = fmul double %a, 0.000000e+00 %mul1 = fmul double %b, 0.000000e+00 %sub = fsub double %mul, %mul1 diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index 5f76f0a..cee47d7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -561,7 +561,7 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest ; ; FALLBACK-LABEL: float_vrev64: ; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: fmov s0, wzr +; FALLBACK-NEXT: movi d0, #0000000000000000 ; FALLBACK-NEXT: mov.s v0[1], v0[0] ; FALLBACK-NEXT: mov.s v0[2], v0[0] ; FALLBACK-NEXT: adrp x8, .LCPI28_0 diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index b0d9db3..de32717 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -1,14 +1,14 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=-zcz | FileCheck %s -check-prefixes=ALL,NONEGP,NONEFP +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=-zcz-gp,+no-zcz-fp | FileCheck %s -check-prefixes=ALL,NONEGP,NONEFP ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz -mattr=+fullfp16 | FileCheck %s -check-prefixes=ALL,ZEROGP,ZERO16 -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gp | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-fp | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gp,+no-zcz-fp | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP ; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=apple-a10 | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP ; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 | FileCheck %s -check-prefixes=ALL,ZEROGP,NONE16 ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=kryo | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP +; UN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP declare void @bar(half, float, double, <2 x double>) declare void @bari(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll index 42c49f7..b49262e 100644 --- a/llvm/test/CodeGen/AArch64/f16-imm.ll +++ b/llvm/test/CodeGen/AArch64/f16-imm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ ; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ ; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll index 7f57d5b..8a21719 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -20,7 +20,7 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; CHECK-LABEL: test_signed_i1_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s1, #-1.00000000 -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: fmaxnm s1, s0, s1 ; CHECK-NEXT: fminnm s1, s1, s2 ; CHECK-NEXT: fcvtzs w8, s1 @@ -243,7 +243,7 @@ define i1 @test_signed_i1_f64(double %f) nounwind { ; CHECK-LABEL: test_signed_i1_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d1, #-1.00000000 -; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: fmaxnm d1, d0, d1 ; CHECK-NEXT: fminnm d1, d1, d2 ; CHECK-NEXT: fcvtzs w8, d1 @@ -462,7 +462,7 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fmov s1, #-1.00000000 -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: fmaxnm s1, s0, s1 ; CHECK-NEXT: fminnm s1, s1, s2 ; CHECK-NEXT: fcvtzs w8, s1 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index d0a9c4d..10d8c0b 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1469,7 +1469,7 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fmov s2, #-1.00000000 -; CHECK-NEXT: fmov s3, wzr +; CHECK-NEXT: movi d3, #0000000000000000 ; CHECK-NEXT: fmaxnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s1 ; CHECK-NEXT: fmaxnm s1, s0, s2 @@ -1849,7 +1849,7 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fmov d2, #-1.00000000 -; CHECK-NEXT: fmov d3, xzr +; CHECK-NEXT: movi d3, #0000000000000000 ; CHECK-NEXT: fmaxnm d4, d1, d2 ; CHECK-NEXT: fcmp d1, d1 ; CHECK-NEXT: fmaxnm d1, d0, d2 @@ -2212,7 +2212,7 @@ define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov s2, #-1.00000000 ; CHECK-NEXT: fcvt s4, h0 -; CHECK-NEXT: fmov s3, wzr +; CHECK-NEXT: movi d3, #0000000000000000 ; CHECK-NEXT: fmaxnm s5, s4, s2 ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fminnm s5, s5, s3 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll index ef29e78..6a19210 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -19,7 +19,7 @@ declare i128 @llvm.fptoui.sat.i128.f32(float) define i1 @test_unsigned_i1_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i1_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, #1.00000000 ; CHECK-NEXT: fminnm s0, s0, s1 @@ -33,7 +33,7 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind { define i8 @test_unsigned_i8_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i8_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w8, #1132396544 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -48,7 +48,7 @@ define i13 @test_unsigned_i13_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i13_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #17919, lsl #16 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -63,7 +63,7 @@ define i16 @test_unsigned_i16_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i16_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65280 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #18303, lsl #16 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -78,7 +78,7 @@ define i19 @test_unsigned_i19_f32(float %f) nounwind { ; CHECK-LABEL: test_unsigned_i19_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65504 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #18687, lsl #16 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -198,7 +198,7 @@ declare i128 @llvm.fptoui.sat.i128.f64(double) define i1 @test_unsigned_i1_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i1_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, #1.00000000 ; CHECK-NEXT: fminnm d0, d0, d1 @@ -213,7 +213,7 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i8_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #246290604621824 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16495, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -228,7 +228,7 @@ define i13 @test_unsigned_i13_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i13_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #280375465082880 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16575, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -243,7 +243,7 @@ define i16 @test_unsigned_i16_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i16_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281337537757184 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16623, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -258,7 +258,7 @@ define i19 @test_unsigned_i19_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i19_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281457796841472 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16671, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -273,7 +273,7 @@ define i32 @test_unsigned_i32_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i32_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -288,7 +288,7 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind { ; CHECK-LABEL: test_unsigned_i50_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-8 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #17167, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -378,7 +378,7 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind { ; CHECK-LABEL: test_unsigned_i1_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, #1.00000000 ; CHECK-NEXT: fminnm s0, s0, s1 @@ -393,7 +393,7 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind { ; CHECK-LABEL: test_unsigned_i8_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w8, #1132396544 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -409,7 +409,7 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #17919, lsl #16 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -425,7 +425,7 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65280 ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #18303, lsl #16 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 @@ -441,7 +441,7 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65504 ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #18687, lsl #16 ; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fmov s1, w8 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 89233de..1f259ff 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -345,7 +345,7 @@ define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) { ; CHECK-LABEL: test_unsigned_v1f64_v1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -361,7 +361,7 @@ define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -383,7 +383,7 @@ define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) { ; CHECK-LABEL: test_unsigned_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d3, xzr +; CHECK-NEXT: movi d3, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d3 ; CHECK-NEXT: fmov d4, x8 @@ -411,7 +411,7 @@ define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) { ; CHECK-LABEL: test_unsigned_v4f64_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: mov d3, v0.d[1] ; CHECK-NEXT: mov d4, v1.d[1] @@ -441,7 +441,7 @@ define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) { ; CHECK-LABEL: test_unsigned_v5f64_v5i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d5, xzr +; CHECK-NEXT: movi d5, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d5 ; CHECK-NEXT: fmov d6, x8 @@ -468,7 +468,7 @@ define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) { ; CHECK-LABEL: test_unsigned_v6f64_v6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d6, xzr +; CHECK-NEXT: movi d6, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fmaxnm d0, d0, d6 ; CHECK-NEXT: fmov d7, x8 @@ -1132,7 +1132,7 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: fmov s2, #1.00000000 ; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fmaxnm s0, s0, s1 @@ -1153,7 +1153,7 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w8, #1132396544 ; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fmaxnm s0, s0, s1 @@ -1176,7 +1176,7 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #17919, lsl #16 ; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fmaxnm s0, s0, s1 @@ -1199,7 +1199,7 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65280 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #18303, lsl #16 ; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fmaxnm s0, s0, s1 @@ -1222,7 +1222,7 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65504 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w8, #18687, lsl #16 ; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fmaxnm s0, s0, s1 @@ -1433,7 +1433,7 @@ declare <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double>) define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: fmov d2, #1.00000000 ; CHECK-NEXT: mov d3, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1454,7 +1454,7 @@ define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #246290604621824 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16495, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1476,7 +1476,7 @@ define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i13: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #280375465082880 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16575, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1498,7 +1498,7 @@ define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281337537757184 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16623, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1520,7 +1520,7 @@ define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i19: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281457796841472 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16671, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1542,7 +1542,7 @@ define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #281474974613504 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1564,7 +1564,7 @@ define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i50: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-8 -; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk x8, #17167, lsl #48 ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fmaxnm d0, d0, d1 @@ -1726,7 +1726,7 @@ define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) { ; CHECK-NEXT: mov h3, v0.h[1] ; CHECK-NEXT: mov h4, v0.h[2] ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvt s4, h4 ; CHECK-NEXT: fcvt s0, h0 @@ -1761,7 +1761,7 @@ define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) { ; CHECK-NEXT: mov h3, v0.h[1] ; CHECK-NEXT: mov h4, v0.h[2] ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: mov w8, #1132396544 ; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvt s4, h4 @@ -1798,7 +1798,7 @@ define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) { ; CHECK-NEXT: mov h3, v0.h[1] ; CHECK-NEXT: mov h4, v0.h[2] ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: movk w8, #17919, lsl #16 ; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvt s4, h4 @@ -1835,7 +1835,7 @@ define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) { ; CHECK-NEXT: mov h3, v0.h[1] ; CHECK-NEXT: mov h4, v0.h[2] ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: movk w8, #18303, lsl #16 ; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvt s4, h4 @@ -1872,7 +1872,7 @@ define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) { ; CHECK-NEXT: mov h3, v0.h[1] ; CHECK-NEXT: mov h4, v0.h[2] ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: movk w8, #18687, lsl #16 ; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvt s4, h4 diff --git a/llvm/test/CodeGen/AArch64/remat-float0.ll b/llvm/test/CodeGen/AArch64/remat-float0.ll index 29af781..0b5d28b 100644 --- a/llvm/test/CodeGen/AArch64/remat-float0.ll +++ b/llvm/test/CodeGen/AArch64/remat-float0.ll @@ -1,15 +1,15 @@ ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s -; Check that float 0 gets rematerialized with an fmov of zero reg instead +; Check that float 0 gets rematerialized with an "movi zero" instead ; of spilled/filled. declare void @bar(float) define void @foo() { ; CHECK-LABEL: foo: -; CHECK: fmov s0, wzr +; CHECK: movi d0, #0000000000000000 ; CHECK: bl bar -; CHECK: fmov s0, wzr +; CHECK: movi d0, #0000000000000000 ; CHECK: bl bar call void @bar(float 0.000000e+00) call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"()