From 613196462a62a28de8414b9023ec2be9a29ac3dc Mon Sep 17 00:00:00 2001 From: liuhongt Date: Fri, 24 Sep 2021 19:17:42 +0800 Subject: [PATCH] Simplify (_Float16) ceil ((double) x) to .CEIL (x) when available. gcc/ChangeLog: PR target/102464 * config/i386/i386.c (ix86_optab_supported_p): Return true for HFmode. * match.pd: Simplify (_Float16) ceil ((double) x) to __builtin_ceilf16 (a) when a is _Float16 type and direct_internal_fn_supported_p. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102464.c: New test. --- gcc/config/i386/i386.c | 20 +++++++++------- gcc/match.pd | 29 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a566d84..7bc1b05 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, return opt_type == OPTIMIZE_FOR_SPEED; case rint_optab: - if (SSE_FLOAT_MODE_P (mode1) - && TARGET_SSE_MATH - && !flag_trapping_math - && !TARGET_SSE4_1) + if (mode1 == HFmode) + return true; + else if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && !TARGET_SSE4_1) return opt_type == OPTIMIZE_FOR_SPEED; return true; case floor_optab: case ceil_optab: case btrunc_optab: - if (SSE_FLOAT_MODE_P (mode1) - && TARGET_SSE_MATH - && !flag_trapping_math - && TARGET_SSE4_1) + if (mode1 == HFmode) + return true; + else if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && TARGET_SSE4_1) return true; return opt_type == OPTIMIZE_FOR_SPEED; diff --git a/gcc/match.pd b/gcc/match.pd index a9791ce..9d7c1ac 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6191,6 +6191,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (froms (convert float_value_p@0)) (convert (tos @0))))) +#if GIMPLE +(match float16_value_p + @0 + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node))) +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF + BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF + BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF + BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF + BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF + BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF + BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF) + tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC + IFN_FLOOR IFN_FLOOR IFN_FLOOR + IFN_CEIL IFN_CEIL IFN_CEIL + IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN + IFN_ROUND IFN_ROUND IFN_ROUND + IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT + IFN_RINT IFN_RINT IFN_RINT) + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc., + if x is a _Float16. */ + (simplify + (convert (froms (convert float16_value_p@0))) + (if (optimize + && types_match (type, TREE_TYPE (@0)) + && direct_internal_fn_supported_p (as_internal_fn (tos), + type, OPTIMIZE_FOR_BOTH)) + (tos @0)))) +#endif + (for froms (XFLOORL XCEILL XROUNDL XRINTL) tos (XFLOOR XCEIL XROUND XRINT) /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double. */ diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c new file mode 100644 index 0000000..e3e060e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102464.c @@ -0,0 +1,39 @@ +/* PR target/102464. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#define FOO(FUNC,SUFFIX) \ + _Float16 \ + foo_##FUNC##_##SUFFIX (_Float16 a) \ + { \ + return __builtin_##FUNC##SUFFIX (a); \ + } + +FOO (roundeven, f16); +FOO (roundeven, f); +FOO (roundeven, ); +FOO (roundeven, l); +FOO (trunc, f16); +FOO (trunc, f); +FOO (trunc, ); +FOO (trunc, l); +FOO (ceil, f16); +FOO (ceil, f); +FOO (ceil, ); +FOO (ceil, l); +FOO (floor, f16); +FOO (floor, f); +FOO (floor, ); +FOO (floor, l); +FOO (nearbyint, f16); +FOO (nearbyint, f); +FOO (nearbyint, ); +FOO (nearbyint, l); +FOO (rint, f16); +FOO (rint, f); +FOO (rint, ); +FOO (rint, l); + +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */ +/* { dg-final { scan-assembler-not "extendhfxf" } } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */ -- 2.7.4