From 1531f39268c8973cf9478585fba5c5bbdb6e9c4c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 16 Feb 2021 08:59:03 +0100 Subject: [PATCH] openmp: Fix up vectorization simd call badness computation [PR99100] As mentioned in the PR, ix86_simd_clone_usable didn't make it more desirable to use 'e' mangled AVX512F entrypoints over 'd' mangled ones (AVX2) with the same simdlen. This patch fixes that. I have tweaked the generic code too to make more room for these target specific badness factors. 2021-02-16 Jakub Jelinek PR target/99100 * tree-vect-stmts.c (vectorizable_simd_clone_call): For num_calls != 1 multiply by 4096 and for inbranch by 8192. * config/i386/i386.c (ix86_simd_clone_usable): For TARGET_AVX512F, return 3, 2 or 1 for mangle letters 'b', 'c' or 'd'. * gcc.target/i386/pr99100.c: New test. --- gcc/config/i386/i386.c | 6 +++--- gcc/testsuite/gcc.target/i386/pr99100.c | 22 ++++++++++++++++++++++ gcc/tree-vect-stmts.c | 4 ++-- 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr99100.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 48f9aa0..2fe182f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22657,15 +22657,15 @@ ix86_simd_clone_usable (struct cgraph_node *node) return -1; if (!TARGET_AVX) return 0; - return TARGET_AVX2 ? 2 : 1; + return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; case 'c': if (!TARGET_AVX) return -1; - return TARGET_AVX2 ? 1 : 0; + return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; case 'd': if (!TARGET_AVX2) return -1; - return 0; + return TARGET_AVX512F ? 1 : 0; case 'e': if (!TARGET_AVX512F) return -1; diff --git a/gcc/testsuite/gcc.target/i386/pr99100.c b/gcc/testsuite/gcc.target/i386/pr99100.c new file mode 100644 index 0000000..a17fe28 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr99100.c @@ -0,0 +1,22 @@ +/* PR target/99100 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512f -fopenmp-simd -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler "_ZGVeN8v_myfunc" } } */ +/* { dg-final { scan-assembler "_ZGVeN8v_sin" } } */ + +#pragma omp declare simd notinbranch +double sin (double x); +#pragma omp declare simd simdlen(8) notinbranch +__attribute__((const)) double myfunc (double x); + +#define N 1024 +__attribute__((__aligned__ (256))) double a[N], b[N], c[N]; + +void +foo () +{ + for (int i = 0; i < N; i++) + a[i] = myfunc (b[i]); + for (int i = 0; i < N; i++) + c[i] = sin (b[i]); +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 083f38b..85d3161 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3914,9 +3914,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, || n->simdclone->nargs != nargs) continue; if (num_calls != 1) - this_badness += exact_log2 (num_calls) * 1024; + this_badness += exact_log2 (num_calls) * 4096; if (n->simdclone->inbranch) - this_badness += 2048; + this_badness += 8192; int target_badness = targetm.simd_clone.usable (n); if (target_badness < 0) continue; -- 2.7.4