From 3a0e583bf17240e55fee7b490e13753cca52bfe1 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 5 Mar 2020 19:44:42 +0100 Subject: [PATCH] i386: Fix some -O0 avx2intrin.h and xopintrin.h intrinsic macros [PR94046] As the testcases show, the macros we have for -O0 for intrinsics that require constant argument(s) should first cast the argument to the type the -O1+ inline uses and afterwards to whatever type e.g. a builtin needs. The PR reported one which violated this, and I've grepped for all double-casts and grepped out from that meaningful casts where the __m{128,256,512}{,d,i} first cast is cast to same sized __v* type and has the same kind of element type (float, double, integral). These 7 macros were using different casts, and I've double checked them against the inline function types. 2020-03-05 Jakub Jelinek PR target/94046 * config/i386/avx2intrin.h (_mm_mask_i32gather_ps): Fix first cast of SRC and MASK arguments to __m128 from __m128d. (_mm256_mask_i32gather_ps): Fix first cast of MASK argument to __m256 from __m256d. (_mm_mask_i64gather_ps): Fix first cast of MASK argument to __m128 from __m128d. * config/i386/xopintrin.h (_mm_permute2_pd): Fix first cast of C argument to __m128i from __m128d. (_mm256_permute2_pd): Fix first cast of C argument to __m256i from __m256d. (_mm_permute2_ps): Fix first cast of C argument to __m128i from __m128. (_mm256_permute2_ps): Fix first cast of C argument to __m256i from __m256. * g++.target/i386/pr94046-1.C: New test. * g++.target/i386/pr94046-2.C: New test. --- gcc/ChangeLog | 17 ++++++++++ gcc/config/i386/avx2intrin.h | 8 ++--- gcc/config/i386/xopintrin.h | 8 ++--- gcc/testsuite/ChangeLog | 6 ++++ gcc/testsuite/g++.target/i386/pr94046-1.C | 55 +++++++++++++++++++++++++++++++ gcc/testsuite/g++.target/i386/pr94046-2.C | 5 +++ 6 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr94046-1.C create mode 100644 gcc/testsuite/g++.target/i386/pr94046-2.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d54a293..a20f8dc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2020-03-05 Jakub Jelinek + + PR target/94046 + * config/i386/avx2intrin.h (_mm_mask_i32gather_ps): Fix first cast of + SRC and MASK arguments to __m128 from __m128d. + (_mm256_mask_i32gather_ps): Fix first cast of MASK argument to __m256 + from __m256d. + (_mm_mask_i64gather_ps): Fix first cast of MASK argument to __m128 + from __m128d. + * config/i386/xopintrin.h (_mm_permute2_pd): Fix first cast of C + argument to __m128i from __m128d. + (_mm256_permute2_pd): Fix first cast of C argument to __m256i from + __m256d. + (_mm_permute2_ps): Fix first cast of C argument to __m128i from __m128. + (_mm256_permute2_ps): Fix first cast of C argument to __m256i from + __m256. + 2020-03-05 Delia Burduv * config/arm/arm_neon.h (vbfmmlaq_f32): New. diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index f33c55c..4ac21b9 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -1736,10 +1736,10 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, (int)SCALE) #define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ - (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \ + (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC, \ (float const *)BASE, \ (__v4si)(__m128i)INDEX, \ - (__v4sf)(__m128d)MASK, \ + (__v4sf)(__m128)MASK, \ (int)SCALE) #define _mm256_i32gather_ps(BASE, INDEX, SCALE) \ @@ -1754,7 +1754,7 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \ (float const *)BASE, \ (__v8si)(__m256i)INDEX, \ - (__v8sf)(__m256d)MASK, \ + (__v8sf)(__m256)MASK, \ (int)SCALE) #define _mm_i64gather_ps(BASE, INDEX, SCALE) \ @@ -1769,7 +1769,7 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \ (float const *)BASE, \ (__v2di)(__m128i)INDEX, \ - (__v4sf)(__m128d)MASK, \ + (__v4sf)(__m128)MASK, \ (int)SCALE) #define _mm256_i64gather_ps(BASE, INDEX, SCALE) \ diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h index ffdfc13..49bac22 100644 --- a/gcc/config/i386/xopintrin.h +++ b/gcc/config/i386/xopintrin.h @@ -814,25 +814,25 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) #define _mm_permute2_pd(X, Y, C, I) \ ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__v2di)(__m128d)(C), \ + (__v2di)(__m128i)(C), \ (int)(I))) #define _mm256_permute2_pd(X, Y, C, I) \ ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ - (__v4di)(__m256d)(C), \ + (__v4di)(__m256i)(C), \ (int)(I))) #define _mm_permute2_ps(X, Y, C, I) \ ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), \ - (__v4si)(__m128)(C), \ + (__v4si)(__m128i)(C), \ (int)(I))) #define _mm256_permute2_ps(X, Y, C, I) \ ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ - (__v8si)(__m256)(C), \ + (__v8si)(__m256i)(C), \ (int)(I))) #endif /* __OPTIMIZE__ */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3bbb1d2..169cede 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-03-05 Jakub Jelinek + + PR target/94046 + * g++.target/i386/pr94046-1.C: New test. + * g++.target/i386/pr94046-2.C: New test. + 2020-03-05 Uroš Bizjak * g++.dg/asan/asan_test.C (dg-options): Add diff --git a/gcc/testsuite/g++.target/i386/pr94046-1.C b/gcc/testsuite/g++.target/i386/pr94046-1.C new file mode 100644 index 0000000..f2d4724 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr94046-1.C @@ -0,0 +1,55 @@ +// PR target/94046 +// { dg-do compile } +// { dg-options "-O2 -mavx2 -mxop" } + +#include + +#define S(x) struct x { operator __##x (); }; +S (m128) +S (m128d) +S (m128i) +S (m256) +S (m256d) +S (m256i) + +__m128 +f1 (m128 src, float const *base, m128i idx, m128 mask) +{ + return _mm_mask_i32gather_ps (src, base, idx, mask, 2); +} + +__m256 +f2 (m256 src, float const *base, m256i idx, m256 mask) +{ + return _mm256_mask_i32gather_ps (src, base, idx, mask, 2); +} + +__m128 +f3 (m128 src, float const *base, m128i idx, m128 mask) +{ + return _mm_mask_i64gather_ps (src, base, idx, mask, 2); +} + +__m128d +f4 (m128d x, m128d y, m128i c) +{ + return _mm_permute2_pd (x, y, c, 3); +} + +__m128 +f5 (m128 x, m128 y, m128i c) +{ + return _mm_permute2_ps (x, y, c, 3); +} + +__m256d +f6 (m256d x, m256d y, m256i c) +{ + return _mm256_permute2_pd (x, y, c, 3); +} + +__m256 +f7 (m256 x, m256 y, m256i c) +{ + return _mm256_permute2_ps (x, y, c, 3); +} diff --git a/gcc/testsuite/g++.target/i386/pr94046-2.C b/gcc/testsuite/g++.target/i386/pr94046-2.C new file mode 100644 index 0000000..abc8c3e --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr94046-2.C @@ -0,0 +1,5 @@ +// PR target/94046 +// { dg-do compile } +// { dg-options "-O0 -mavx2 -mxop" } + +#include "pr94046-1.C" -- 2.7.4