From 4b3f49649a76afc3ed1b75c7e68d641377e22ae1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 23 May 2018 17:13:31 +0200 Subject: [PATCH] i386.md (*floatuns2_avx512): New insn pattern. * config/i386/i386.md (*floatuns2_avx512): New insn pattern. (floatunssi2): Also enable for AVX512F and TARGET_SSE_MATH. Rewrite expander pattern. Emit gen_floatunssi2_i387_with_xmm for non-SSE modes. (floatunsdisf2): Rewrite expander pattern. Hanlde TARGET_AVX512F. (floatunsdidf2): Ditto. * config/i386/i386.md (fixuns_truncdi2): New insn pattern. (fixuns_truncsi2_avx512f): Ditto. (*fixuns_truncsi2_avx512f_zext): Ditto. (fixuns_truncsi2): Also enable for AVX512F and TARGET_SSE_MATH. Emit fixuns_truncsi2_avx512f for AVX512F targets. testsuite/ChangeLog: * gcc.target/i386/cvt-2.c: New test. * gcc.target/i386/cvt-3.c: New test. From-SVN: r260614 --- gcc/ChangeLog | 18 +++++ gcc/config/i386/i386.md | 121 ++++++++++++++++++++++++++-------- gcc/testsuite/ChangeLog | 8 +++ gcc/testsuite/gcc.target/i386/cvt-2.c | 15 +++++ gcc/testsuite/gcc.target/i386/cvt-3.c | 15 +++++ 5 files changed, 149 insertions(+), 28 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/cvt-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cvt-3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 270c979..e9268d6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2018-05-23 Uros Bizjak + + * config/i386/i386.md (*floatuns2_avx512): + New insn pattern. + (floatunssi2): Also enable for AVX512F and TARGET_SSE_MATH. + Rewrite expander pattern. Emit gen_floatunssi2_i387_with_xmm + for non-SSE modes. + (floatunsdisf2): Rewrite expander pattern. Hanlde TARGET_AVX512F. + (floatunsdidf2): Ditto. + +2018-05-23 Uros Bizjak + + * config/i386/i386.md (fixuns_truncdi2): New insn pattern. + (fixuns_truncsi2_avx512f): Ditto. + (*fixuns_truncsi2_avx512f_zext): Ditto. + (fixuns_truncsi2): Also enable for AVX512F and TARGET_SSE_MATH. + Emit fixuns_truncsi2_avx512f for AVX512F targets. + 2018-05-23 Alexander Monakov PR rtl-optimization/79985 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index cc993b3..bde3c34 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5017,6 +5017,18 @@ } }) +;; Unsigned conversion to DImode + +(define_insn "fixuns_truncdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unsigned_fix:DI + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] + "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtt2usi\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "DI")]) + ;; Unsigned conversion to SImode. (define_expand "fixuns_truncsi2" @@ -5027,13 +5039,19 @@ (use (match_dup 2)) (clobber (match_scratch: 3)) (clobber (match_scratch: 4))])] - "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" + "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH" { machine_mode mode = mode; machine_mode vecmode = mode; REAL_VALUE_TYPE TWO31r; rtx two31; + if (TARGET_AVX512F) + { + emit_insn (gen_fixuns_truncsi2_avx512f (operands[0], operands[1])); + DONE; + } + if (optimize_insn_for_size_p ()) FAIL; @@ -5043,6 +5061,27 @@ operands[2] = force_reg (vecmode, two31); }) +(define_insn "fixuns_truncsi2_avx512f" + [(set (match_operand:SI 0 "register_operand" "=r") + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtt2usi\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "SI")]) + +(define_insn "*fixuns_truncsi2_avx512f_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))] + "TARGET_64BIT && TARGET_AVX512F" + "vcvtt2usi\t{%1, %k0|%k0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*fixuns_trunc_1" [(set (match_operand:SI 0 "register_operand" "=&x,&x") (unsigned_fix:SI @@ -5615,16 +5654,26 @@ DONE; }) +(define_insn "*floatuns2_avx512" + [(set (match_operand:MODEF 0 "register_operand" "=v") + (unsigned_float:MODEF + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_AVX512F && TARGET_SSE_MATH" + "vcvtusi2\t{%1, %0, %0|%0, %0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two ;; SImode values to stack. Also note that fild loads from memory only. -(define_insn_and_split "*floatunssi2_i387_with_xmm" +(define_insn_and_split "floatunssi2_i387_with_xmm" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (unsigned_float:X87MODEF (match_operand:SI 1 "nonimmediate_operand" "rm"))) - (clobber (match_scratch:DI 3 "=x")) - (clobber (match_operand:DI 2 "memory_operand" "=m"))] + (clobber (match_operand:DI 2 "memory_operand" "=m")) + (clobber (match_scratch:DI 3 "=x"))] "!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" @@ -5639,43 +5688,59 @@ (set_attr "mode" "")]) (define_expand "floatunssi2" - [(parallel - [(set (match_operand:X87MODEF 0 "register_operand") - (unsigned_float:X87MODEF - (match_operand:SI 1 "nonimmediate_operand"))) - (clobber (match_scratch:DI 3)) - (clobber (match_dup 2))])] - "!TARGET_64BIT - && ((TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) - || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" + [(set (match_operand:X87MODEF 0 "register_operand") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand")))] + "(!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) + || ((!TARGET_64BIT || TARGET_AVX512F) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_floatunssi2_i387_with_xmm + (operands[0], operands[1], + assign_386_stack_local (DImode, SLOT_TEMP))); + DONE; + } + if (!TARGET_AVX512F) { ix86_expand_convert_uns_si_sse (operands[0], operands[1]); DONE; } - else - operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); }) (define_expand "floatunsdisf2" - [(use (match_operand:SF 0 "register_operand")) - (use (match_operand:DI 1 "nonimmediate_operand"))] + [(set (match_operand:SF 0 "register_operand") + (unsigned_float:SF + (match_operand:DI 1 "nonimmediate_operand")))] "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH" - "x86_emit_floatuns (operands); DONE;") +{ + if (!TARGET_AVX512F) + { + x86_emit_floatuns (operands); + DONE; + } +}) (define_expand "floatunsdidf2" - [(use (match_operand:DF 0 "register_operand")) - (use (match_operand:DI 1 "nonimmediate_operand"))] - "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + [(set (match_operand:DF 0 "register_operand") + (unsigned_float:DF + (match_operand:DI 1 "nonimmediate_operand")))] + "(TARGET_KEEPS_VECTOR_ALIGNED_STACK || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH" { - if (TARGET_64BIT) - x86_emit_floatuns (operands); - else - ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); - DONE; + if (!TARGET_64BIT) + { + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; + } + if (!TARGET_AVX512F) + { + x86_emit_floatuns (operands); + DONE; + } }) ;; Load effective address instructions diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5b16f44..f45de09 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2018-05-23 Uros Bizjak + + * gcc.target/i386/cvt-3.c: New test. + +2018-05-23 Uros Bizjak + + * gcc.target/i386/cvt-2.c: New test. + 2018-05-23 Alexander Monakov * gcc.dg/pr79985.c: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/cvt-2.c b/gcc/testsuite/gcc.target/i386/cvt-2.c new file mode 100644 index 0000000..aa0fd86 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cvt-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */ + +unsigned int f2ui (float x) { return x; } +unsigned int d2ui (double x) { return x; } + +#ifdef __x86_64__ +unsigned long f2ul (float x) { return x; } +unsigned long d2ul (double x) { return x; } +#endif + +/* { dg-final { scan-assembler-times "vcvttss2usi" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vcvttsd2usi" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vcvttss2usi" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttsd2usi" 2 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/cvt-3.c b/gcc/testsuite/gcc.target/i386/cvt-3.c new file mode 100644 index 0000000..132ea6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cvt-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mfpmath=sse" } */ + +float ui2f (unsigned int x) { return x; } +double ui2d (unsigned int x) { return x; } + +#ifdef __x86_64__ +float ul2f (unsigned long x) { return x; } +double ul2d (unsigned long x) { return x; } +#endif + +/* { dg-final { scan-assembler-times "vcvtusi2ss" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vcvtusi2sd" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vcvtusi2ss" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvtusi2sd" 2 { target { ! ia32 } } } } */ -- 2.7.4