From 2069d6fcff179ad5519d7c59d52308a60af51e48 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 15 Nov 2014 17:56:27 +0100 Subject: [PATCH] xmmintrin.h (_mm_add_ps, [...]): Use vector extensions instead of builtins. 2014-11-15 Marc Glisse gcc/ * config/i386/xmmintrin.h (_mm_add_ps, _mm_sub_ps, _mm_mul_ps, _mm_div_ps, _mm_store_ss, _mm_cvtss_f32): Use vector extensions instead of builtins. * config/i386/emmintrin.h (__v2du, __v4su, __v8hu, __v16qu): New typedefs. (_mm_sqrt_sd): Fix comment. (_mm_add_epi8, _mm_add_epi16, _mm_add_epi32, _mm_add_epi64, _mm_sub_epi8, _mm_sub_epi16, _mm_sub_epi32, _mm_sub_epi64, _mm_mullo_epi16, _mm_cmpeq_epi8, _mm_cmpeq_epi16, _mm_cmpeq_epi32, _mm_cmplt_epi8, _mm_cmplt_epi16, _mm_cmplt_epi32, _mm_cmpgt_epi8, _mm_cmpgt_epi16, _mm_cmpgt_epi32, _mm_and_si128, _mm_or_si128, _mm_xor_si128, _mm_store_sd, _mm_cvtsd_f64, _mm_storeh_pd, _mm_cvtsi128_si64, _mm_cvtsi128_si64x, _mm_add_pd, _mm_sub_pd, _mm_mul_pd, _mm_div_pd, _mm_storel_epi64, _mm_movepi64_pi64): Use vector extensions instead of builtins. * config/i386/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64, _mm_mullo_epi32): Likewise. * config/i386/avxintrin.h (__v4du, __v8su, __v16hu, __v32qu): New typedefs. (_mm256_add_pd, _mm256_add_ps, _mm256_div_pd, _mm256_div_ps, _mm256_mul_pd, _mm256_mul_ps, _mm256_sub_pd, _mm256_sub_ps): Use vector extensions instead of builtins. * config/i386/avx2intrin.h (_mm256_cmpeq_epi8, _mm256_cmpeq_epi16, _mm256_cmpeq_epi32, _mm256_cmpeq_epi64, _mm256_cmpgt_epi8, _mm256_cmpgt_epi16, _mm256_cmpgt_epi32, _mm256_cmpgt_epi64, _mm256_and_si256, _mm256_or_si256, _mm256_xor_si256, _mm256_add_epi8, _mm256_add_epi16, _mm256_add_epi32, _mm256_add_epi64, _mm256_mullo_epi16, _mm256_mullo_epi32, _mm256_sub_epi8, _mm256_sub_epi16, _mm256_sub_epi32, _mm256_sub_epi64): Likewise. * config/i386/avx512fintrin.h (__v8du, __v16su, __v32hu, __v64qu): New typedefs. (_mm512_or_si512, _mm512_or_epi32, _mm512_or_epi64, _mm512_xor_si512, _mm512_xor_epi32, _mm512_xor_epi64, _mm512_and_si512, _mm512_and_epi32, _mm512_and_epi64, _mm512_mullo_epi32, _mm512_add_epi64, _mm512_sub_epi64, _mm512_add_epi32, _mm512_sub_epi32, _mm512_add_pd, _mm512_add_ps, _mm512_sub_pd, _mm512_sub_ps, _mm512_mul_pd, _mm512_mul_ps, _mm512_div_pd, _mm512_div_ps): Use vector extensions instead of builtins. * config/i386/avx512bwintrin.h (_mm512_mullo_epi16, _mm512_add_epi8, _mm512_sub_epi8, _mm512_sub_epi16, _mm512_add_epi16): Likewise. * config/i386/avx512dqintrin.h (_mm512_mullo_epi64): Likewise. * config/i386/avx512vldqintrin.h (_mm256_mullo_epi64, _mm_mullo_epi64): Likewise. gcc/testsuite/ * gcc.target/i386/intrinsics_opt-1.c: New testcase. * gcc.target/i386/intrinsics_opt-2.c: Likewise. * gcc.target/i386/intrinsics_opt-3.c: Likewise. * gcc.target/i386/intrinsics_opt-4.c: Likewise. From-SVN: r217608 --- gcc/ChangeLog | 46 ++++++++ gcc/config/i386/avx2intrin.h | 45 ++++--- gcc/config/i386/avx512bwintrin.h | 30 +---- gcc/config/i386/avx512dqintrin.h | 6 +- gcc/config/i386/avx512fintrin.h | 144 ++++------------------- gcc/config/i386/avx512vldqintrin.h | 12 +- gcc/config/i386/avxintrin.h | 20 ++-- gcc/config/i386/emmintrin.h | 70 +++++------ gcc/config/i386/smmintrin.h | 6 +- gcc/config/i386/xmmintrin.h | 12 +- gcc/testsuite/ChangeLog | 7 ++ gcc/testsuite/gcc.target/i386/intrinsics_opt-1.c | 11 ++ gcc/testsuite/gcc.target/i386/intrinsics_opt-2.c | 16 +++ gcc/testsuite/gcc.target/i386/intrinsics_opt-3.c | 13 ++ gcc/testsuite/gcc.target/i386/intrinsics_opt-4.c | 12 ++ 15 files changed, 218 insertions(+), 232 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/intrinsics_opt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/intrinsics_opt-2.c create mode 100644 gcc/testsuite/gcc.target/i386/intrinsics_opt-3.c create mode 100644 gcc/testsuite/gcc.target/i386/intrinsics_opt-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bb82d0e..e705396 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,49 @@ +2014-11-15 Marc Glisse + + * config/i386/xmmintrin.h (_mm_add_ps, _mm_sub_ps, _mm_mul_ps, + _mm_div_ps, _mm_store_ss, _mm_cvtss_f32): Use vector extensions + instead of builtins. + * config/i386/emmintrin.h (__v2du, __v4su, __v8hu, __v16qu): New + typedefs. + (_mm_sqrt_sd): Fix comment. + (_mm_add_epi8, _mm_add_epi16, _mm_add_epi32, _mm_add_epi64, + _mm_sub_epi8, _mm_sub_epi16, _mm_sub_epi32, _mm_sub_epi64, + _mm_mullo_epi16, _mm_cmpeq_epi8, _mm_cmpeq_epi16, _mm_cmpeq_epi32, + _mm_cmplt_epi8, _mm_cmplt_epi16, _mm_cmplt_epi32, _mm_cmpgt_epi8, + _mm_cmpgt_epi16, _mm_cmpgt_epi32, _mm_and_si128, _mm_or_si128, + _mm_xor_si128, _mm_store_sd, _mm_cvtsd_f64, _mm_storeh_pd, + _mm_cvtsi128_si64, _mm_cvtsi128_si64x, _mm_add_pd, _mm_sub_pd, + _mm_mul_pd, _mm_div_pd, _mm_storel_epi64, _mm_movepi64_pi64): + Use vector extensions instead of builtins. + * config/i386/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64, + _mm_mullo_epi32): Likewise. + * config/i386/avxintrin.h (__v4du, __v8su, __v16hu, __v32qu): + New typedefs. + (_mm256_add_pd, _mm256_add_ps, _mm256_div_pd, _mm256_div_ps, + _mm256_mul_pd, _mm256_mul_ps, _mm256_sub_pd, _mm256_sub_ps): + Use vector extensions instead of builtins. + * config/i386/avx2intrin.h (_mm256_cmpeq_epi8, _mm256_cmpeq_epi16, + _mm256_cmpeq_epi32, _mm256_cmpeq_epi64, _mm256_cmpgt_epi8, + _mm256_cmpgt_epi16, _mm256_cmpgt_epi32, _mm256_cmpgt_epi64, + _mm256_and_si256, _mm256_or_si256, _mm256_xor_si256, _mm256_add_epi8, + _mm256_add_epi16, _mm256_add_epi32, _mm256_add_epi64, + _mm256_mullo_epi16, _mm256_mullo_epi32, _mm256_sub_epi8, + _mm256_sub_epi16, _mm256_sub_epi32, _mm256_sub_epi64): Likewise. + * config/i386/avx512fintrin.h (__v8du, __v16su, __v32hu, __v64qu): + New typedefs. + (_mm512_or_si512, _mm512_or_epi32, _mm512_or_epi64, _mm512_xor_si512, + _mm512_xor_epi32, _mm512_xor_epi64, _mm512_and_si512, + _mm512_and_epi32, _mm512_and_epi64, _mm512_mullo_epi32, + _mm512_add_epi64, _mm512_sub_epi64, _mm512_add_epi32, + _mm512_sub_epi32, _mm512_add_pd, _mm512_add_ps, _mm512_sub_pd, + _mm512_sub_ps, _mm512_mul_pd, _mm512_mul_ps, _mm512_div_pd, + _mm512_div_ps): Use vector extensions instead of builtins. + * config/i386/avx512bwintrin.h (_mm512_mullo_epi16, _mm512_add_epi8, + _mm512_sub_epi8, _mm512_sub_epi16, _mm512_add_epi16): Likewise. + * config/i386/avx512dqintrin.h (_mm512_mullo_epi64): Likewise. + * config/i386/avx512vldqintrin.h (_mm256_mullo_epi64, _mm_mullo_epi64): + Likewise. + 2014-11-15 Jan Hubicka * lto-streamer-out.c (hash_tree): Use cl_optimization_hash. diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index d04c972..bca9c9e 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -104,28 +104,28 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi8 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B); + return (__m256i) ((__v32qu)__A + (__v32qu)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi16 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B); + return (__m256i) ((__v16hu)__A + (__v16hu)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B); + return (__m256i) ((__v8su)__A + (__v8su)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4du)__A + (__v4du)__B); } extern __inline __m256i @@ -178,7 +178,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_and_si256 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4du)__A & (__v4du)__B); } extern __inline __m256i @@ -230,59 +230,56 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi8 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B); + return (__m256i) ((__v32qi)__A == (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi16 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B); + return (__m256i) ((__v16hi)__A == (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B); + return (__m256i) ((__v8si)__A == (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4di)__A == (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi8 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A, - (__v32qi)__B); + return (__m256i) ((__v32qi)__A > (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi16 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A, - (__v16hi)__B); + return (__m256i) ((__v16hi)__A > (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A, - (__v8si)__B); + return (__m256i) ((__v8si)__A > (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4di)__A > (__v4di)__B); } extern __inline __m256i @@ -555,14 +552,14 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mullo_epi16 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B); + return (__m256i) ((__v16hu)__A * (__v16hu)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mullo_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B); + return (__m256i) ((__v8su)__A * (__v8su)__B); } extern __inline __m256i @@ -576,7 +573,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_or_si256 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4du)__A | (__v4du)__B); } extern __inline __m256i @@ -785,28 +782,28 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi8 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B); + return (__m256i) ((__v32qu)__A - (__v32qu)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi16 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B); + return (__m256i) ((__v16hu)__A - (__v16hu)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B); + return (__m256i) ((__v8su)__A - (__v8su)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4du)__A - (__v4du)__B); } extern __inline __m256i @@ -897,7 +894,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_xor_si256 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B); + return (__m256i) ((__v4du)__A ^ (__v4du)__B); } extern __inline __m256i diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index d70cae0..41a9a61 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -464,11 +464,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mullo_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i) ((__v32hu) __A * (__v32hu) __B); } extern __inline __m512i @@ -673,11 +669,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) - _mm512_setzero_qi (), - (__mmask64) -1); + return (__m512i) ((__v64qu) __A + (__v64qu) __B); } extern __inline __m512i @@ -706,11 +698,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) - _mm512_setzero_qi (), - (__mmask64) -1); + return (__m512i) ((__v64qu) __A - (__v64qu) __B); } extern __inline __m512i @@ -904,11 +892,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i) ((__v32hu) __A - (__v32hu) __B); } extern __inline __m512i @@ -1003,11 +987,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i) ((__v32hu) __A + (__v32hu) __B); } extern __inline __m512i diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 6014a1c..82f181c 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -225,11 +225,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mullo_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i) ((__v8du) __A * (__v8du) __B); } extern __inline __m512i diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 3f362d4..66f5199 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -38,9 +38,13 @@ typedef double __v8df __attribute__ ((__vector_size__ (64))); typedef float __v16sf __attribute__ ((__vector_size__ (64))); typedef long long __v8di __attribute__ ((__vector_size__ (64))); +typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); typedef int __v16si __attribute__ ((__vector_size__ (64))); +typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); typedef char __v64qi __attribute__ ((__vector_size__ (64))); +typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ @@ -515,11 +519,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mullo_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A * (__v16su) __B); } extern __inline __m512i @@ -642,11 +642,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); + return (__m512i) ((__v8du) __A + (__v8du) __B); } extern __inline __m512i @@ -674,11 +670,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512i) ((__v8du) __A - (__v8du) __B); } extern __inline __m512i @@ -802,11 +794,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A + (__v16su) __B); } extern __inline __m512i @@ -865,11 +853,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A - (__v16su) __B); } extern __inline __m512i @@ -6797,22 +6781,14 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_or_si512 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A | (__v16su) __B); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_or_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A | (__v16su) __B); } extern __inline __m512i @@ -6840,11 +6816,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_or_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); + return (__m512i) ((__v8du) __A | (__v8du) __B); } extern __inline __m512i @@ -6872,22 +6844,14 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_xor_si512 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A ^ (__v16su) __B); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_xor_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A ^ (__v16su) __B); } extern __inline __m512i @@ -6915,11 +6879,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_xor_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); + return (__m512i) ((__v8du) __A ^ (__v8du) __B); } extern __inline __m512i @@ -7128,22 +7088,14 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_and_si512 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A & (__v16su) __B); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_and_epi32 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); + return (__m512i) ((__v16su) __A & (__v16su) __B); } extern __inline __m512i @@ -7171,11 +7123,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_and_epi64 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); + return (__m512i) ((__v8du) __A & (__v8du) __B); } extern __inline __m512i @@ -10749,12 +10697,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_pd (__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) ((__v8df)__A + (__v8df)__B); } extern __inline __m512d @@ -10784,12 +10727,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_ps (__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) ((__v16sf)__A + (__v16sf)__B); } extern __inline __m512 @@ -10819,12 +10757,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_pd (__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) ((__v8df)__A - (__v8df)__B); } extern __inline __m512d @@ -10854,12 +10787,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_ps (__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) ((__v16sf)__A - (__v16sf)__B); } extern __inline __m512 @@ -10889,12 +10817,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mul_pd (__m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) ((__v8df)__A * (__v8df)__B); } extern __inline __m512d @@ -10924,12 +10847,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mul_ps (__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) ((__v16sf)__A * (__v16sf)__B); } extern __inline __m512 @@ -10959,12 +10877,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_div_pd (__m512d __M, __m512d __V) { - return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, - (__v8df) __V, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) ((__v8df)__M / (__v8df)__V); } extern __inline __m512d @@ -10994,12 +10907,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_div_ps (__m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) ((__v16sf)__A / (__v16sf)__B); } extern __inline __m512 diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h index 4ad1fd7..2be1bf7 100644 --- a/gcc/config/i386/avx512vldqintrin.h +++ b/gcc/config/i386/avx512vldqintrin.h @@ -544,11 +544,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mullo_epi64 (__m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i) ((__v4du) __A * (__v4du) __B); } extern __inline __m256i @@ -577,11 +573,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi64 (__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) -1); + return (__m128i) ((__v2du) __A * (__v2du) __B); } extern __inline __m128i diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 2ea327c..65e2145 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -41,9 +41,13 @@ typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef long long __v4di __attribute__ ((__vector_size__ (32))); +typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))); typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); +typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ @@ -124,13 +128,13 @@ typedef double __m256d __attribute__ ((__vector_size__ (32), extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_pd (__m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B); + return (__m256d) ((__v4df)__A + (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_ps (__m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B); + return (__m256) ((__v8sf)__A + (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -218,13 +222,13 @@ _mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M) extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_pd (__m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B); + return (__m256d) ((__v4df)__A / (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_ps (__m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B); + return (__m256) ((__v8sf)__A / (__v8sf)__B); } /* Dot product instructions with mask-defined summing and zeroing parts @@ -295,13 +299,13 @@ _mm256_min_ps (__m256 __A, __m256 __B) extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_pd (__m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B); + return (__m256d) ((__v4df)__A * (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_ps (__m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B); + return (__m256) ((__v8sf)__A * (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -343,13 +347,13 @@ _mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask) extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_pd (__m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B); + return (__m256d) ((__v4df)__A - (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_ps (__m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B); + return (__m256) ((__v8sf)__A - (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index a2bdf0e..fe6e3f5 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -39,9 +39,13 @@ /* SSE2 */ typedef double __v2df __attribute__ ((__vector_size__ (16))); typedef long long __v2di __attribute__ ((__vector_size__ (16))); +typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef unsigned int __v4su __attribute__ ((__vector_size__ (16))); typedef short __v8hi __attribute__ ((__vector_size__ (16))); +typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16))); typedef char __v16qi __attribute__ ((__vector_size__ (16))); +typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ @@ -168,13 +172,13 @@ _mm_storeu_pd (double *__P, __m128d __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_sd (double *__P, __m128d __A) { - *__P = __builtin_ia32_vec_ext_v2df (__A, 0); + *__P = ((__v2df)__A)[0]; } extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_f64 (__m128d __A) { - return __builtin_ia32_vec_ext_v2df (__A, 0); + return ((__v2df)__A)[0]; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -187,7 +191,7 @@ _mm_storel_pd (double *__P, __m128d __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pd (double *__P, __m128d __A) { - *__P = __builtin_ia32_vec_ext_v2df (__A, 1); + *__P = ((__v2df)__A)[1]; } /* Store the lower DPFP value across two words. @@ -222,21 +226,21 @@ _mm_cvtsi128_si32 (__m128i __A) extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64 (__m128i __A) { - return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); + return ((__v2di)__A)[0]; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64x (__m128i __A) { - return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); + return ((__v2di)__A)[0]; } #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pd (__m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); + return (__m128d) ((__v2df)__A + (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -248,7 +252,7 @@ _mm_add_sd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pd (__m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); + return (__m128d) ((__v2df)__A - (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -260,7 +264,7 @@ _mm_sub_sd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_pd (__m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); + return (__m128d) ((__v2df)__A * (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -272,7 +276,7 @@ _mm_mul_sd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_pd (__m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); + return (__m128d) ((__v2df)__A / (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -287,7 +291,7 @@ _mm_sqrt_pd (__m128d __A) return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); } -/* Return pair {sqrt (A[0), B[1]}. */ +/* Return pair {sqrt (B[0]), A[1]}. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_sd (__m128d __A, __m128d __B) { @@ -715,13 +719,13 @@ _mm_storeu_si128 (__m128i *__P, __m128i __B) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_epi64 (__m128i *__P, __m128i __B) { - *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); + *(long long *)__P = ((__v2di)__B)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movepi64_pi64 (__m128i __B) { - return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); + return (__m64) ((__v2di)__B)[0]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1006,25 +1010,25 @@ _mm_unpacklo_epi64 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi8 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); + return (__m128i) ((__v16qu)__A + (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); + return (__m128i) ((__v8hu)__A + (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); + return (__m128i) ((__v4su)__A + (__v4su)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); + return (__m128i) ((__v2du)__A + (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1054,25 +1058,25 @@ _mm_adds_epu16 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi8 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); + return (__m128i) ((__v16qu)__A - (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); + return (__m128i) ((__v8hu)__A - (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); + return (__m128i) ((__v4su)__A - (__v4su)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); + return (__m128i) ((__v2du)__A - (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1114,7 +1118,7 @@ _mm_mulhi_epi16 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); + return (__m128i) ((__v8hu)__A * (__v8hu)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1247,7 +1251,7 @@ _mm_srl_epi64 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_si128 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); + return (__m128i) ((__v2du)__A & (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1259,67 +1263,67 @@ _mm_andnot_si128 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_si128 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); + return (__m128i) ((__v2du)__A | (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_si128 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); + return (__m128i) ((__v2du)__A ^ (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi8 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); + return (__m128i) ((__v16qi)__A == (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); + return (__m128i) ((__v8hi)__A == (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); + return (__m128i) ((__v4si)__A == (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi8 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); + return (__m128i) ((__v16qi)__A < (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); + return (__m128i) ((__v8hi)__A < (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); + return (__m128i) ((__v4si)__A < (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi8 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); + return (__m128i) ((__v16qi)__A > (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); + return (__m128i) ((__v8hi)__A > (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); + return (__m128i) ((__v4si)__A > (__v4si)__B); } #ifdef __OPTIMIZE__ diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 886ace4..32f7f57 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -267,7 +267,7 @@ _mm_dp_pd (__m128d __X, __m128d __Y, const int __M) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi64 (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y); + return (__m128i) ((__v2di)__X == (__v2di)__Y); } /* Min/max packed integer instructions. */ @@ -325,7 +325,7 @@ _mm_max_epu32 (__m128i __X, __m128i __Y) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi32 (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y); + return (__m128i) ((__v4su)__X * (__v4su)__Y); } /* Packed integer 32-bit multiplication of 2 pairs of operands @@ -795,7 +795,7 @@ _mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi64 (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y); + return (__m128i) ((__v2di)__X > (__v2di)__Y); } #ifdef __DISABLE_SSE4_2__ diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index ec57234..69c2cac 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -180,25 +180,25 @@ _mm_max_ss (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A + (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A - (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A * (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_ps (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B); + return (__m128) ((__v4sf)__A / (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -957,13 +957,13 @@ _mm_setr_ps (float __Z, float __Y, float __X, float __W) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ss (float *__P, __m128 __A) { - *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); + *__P = ((__v4sf)__A)[0]; } extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_f32 (__m128 __A) { - return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); + return ((__v4sf)__A)[0]; } /* Store four SPFP values. The address must be 16-byte aligned. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d75ee69..a0fe1a4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2014-11-15 Marc Glisse + + * gcc.target/i386/intrinsics_opt-1.c: New testcase. + * gcc.target/i386/intrinsics_opt-2.c: Likewise. + * gcc.target/i386/intrinsics_opt-3.c: Likewise. + * gcc.target/i386/intrinsics_opt-4.c: Likewise. + 2014-11-15 Francois-Xavier Coudert * gcc.dg/tree-ssa/pr61144.c: Add dg-require-alias. diff --git a/gcc/testsuite/gcc.target/i386/intrinsics_opt-1.c b/gcc/testsuite/gcc.target/i386/intrinsics_opt-1.c new file mode 100644 index 0000000..a75bf4e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/intrinsics_opt-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mfma" } */ + +#include + +__m128d myfma(__m128d x, __m128d y, __m128d z){ + __m128d m = _mm_mul_pd (x, y); + return _mm_add_pd (m, z); +} + +/* { dg-final { scan-assembler "vfmadd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/intrinsics_opt-2.c b/gcc/testsuite/gcc.target/i386/intrinsics_opt-2.c new file mode 100644 index 0000000..56be74b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/intrinsics_opt-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ffast-math -msse2 -fdump-tree-optimized" } */ + +#include + +int f(__m128d x){ + x = _mm_sub_pd (x, x); + x = _mm_mul_pd (x, x); + double r = 42; + _mm_storeh_pd (&r, x); + int z = r == 0; + return __builtin_constant_p (z) && z; +} + +/* { dg-final { scan-tree-dump "return 1;" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/i386/intrinsics_opt-3.c b/gcc/testsuite/gcc.target/i386/intrinsics_opt-3.c new file mode 100644 index 0000000..f2b3161 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/intrinsics_opt-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msse2" } */ + +#include + +double f(){ + __m128d x = _mm_set1_pd (0.); + double r = 42; + _mm_storeh_pd (&r, x); + return r; +} + +/* { dg-final { scan-assembler-not "unpckhpd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/intrinsics_opt-4.c b/gcc/testsuite/gcc.target/i386/intrinsics_opt-4.c new file mode 100644 index 0000000..30a3cb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/intrinsics_opt-4.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ffast-math -msse2" } */ + +#include + +__m128d f(__m128d x, __m128d y, __m128d z){ + y = _mm_add_pd (x, y); + y = _mm_add_pd (z, y); + return _mm_sub_pd (y, x); +} + +/* { dg-final { scan-assembler-not "subpd" } } */ -- 2.7.4