From 32fe15ec71951cb18be7d3f90019147b55b8ee76 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 May 2007 17:40:01 +0200 Subject: [PATCH] tmmintrin.h (_mm_alignr_epi8): Provide macro implementation if __OPTIMIZE__ is not defined. * config/i386/tmmintrin.h (_mm_alignr_epi8): Provide macro implementation if __OPTIMIZE__ is not defined. (_mm_alignr_pi8): Ditto. * config/i386/ammintrin.h (_mm_extracti_si64): Ditto. (_mm_inserti_si64): Ditto. * config/i386/emmintrin.h (_mm_shuffle_pd): Ditto. (_mm_slli_epi16): Ditto. (_mm_slli_epi32): Ditto. (_mm_slli_epi64): Ditto. (_mm_srai_epi16): Ditto. (_mm_srai_epi32): Ditto. (_mm_srli_si128): Ditto. (_mm_slli_si128): Ditto. (_mm_srli_epi16): Ditto. (_mm_srli_epi32): Ditto. (_mm_srli_epi64): Ditto. (_mm_extract_epi16): Ditto. (_mm_insert_epi16): Ditto. (_mm_shufflehi_epi16): Ditto. (_mm_shufflelo_epi16): Ditto. (_mm_shuffle_epi32): Ditto. * config/i386/xmmintrin.h (_mm_extract_pi16): Ditto. (_m_pextrw): Ditto. (_mm_insert_pi16): Ditto. (_m_pinsrw): Ditto. (_mm_shuffle_pi16): Ditto. (_m_pshufw): Ditto. (_mm_shufle_ps): Ditto. (_mm_prefetch): Ditto. testsuite/ChangeLog: * gcc.target/i386/sse-14.c: Remove all intrinsic redefines. * gcc.target/i386/sse-12: Add -O to compile options. * g++.dg/other/i386-2.C: Use "-march=k8 -m3dnow -mssse3 -msse4a" instead of "-msse3". Include only ammintrin.h, tmmintrin.h and mm3dnow.h. Add -O to compile options. From-SVN: r124904 --- gcc/ChangeLog | 32 ++++++++++++++++++++ gcc/config/i386/ammintrin.h | 11 ++++++- gcc/config/i386/emmintrin.h | 53 ++++++++++++++++++++++++++++++++++ gcc/config/i386/tmmintrin.h | 7 +++++ gcc/config/i386/xmmintrin.h | 27 +++++++++++++++++ gcc/testsuite/ChangeLog | 8 +++++ gcc/testsuite/g++.dg/other/i386-1.C | 3 +- gcc/testsuite/g++.dg/other/i386-2.C | 14 ++++----- gcc/testsuite/gcc.target/i386/sse-12.c | 4 +-- gcc/testsuite/gcc.target/i386/sse-14.c | 37 ------------------------ 10 files changed, 146 insertions(+), 50 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 32ff87b..f94c125 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2007-05-21 Uros Bizjak + + * config/i386/tmmintrin.h (_mm_alignr_epi8): Provide macro + implementation if __OPTIMIZE__ is not defined. + (_mm_alignr_pi8): Ditto. + * config/i386/ammintrin.h (_mm_extracti_si64): Ditto. + (_mm_inserti_si64): Ditto. + * config/i386/emmintrin.h (_mm_shuffle_pd): Ditto. + (_mm_slli_epi16): Ditto. + (_mm_slli_epi32): Ditto. + (_mm_slli_epi64): Ditto. + (_mm_srai_epi16): Ditto. + (_mm_srai_epi32): Ditto. + (_mm_srli_si128): Ditto. + (_mm_slli_si128): Ditto. + (_mm_srli_epi16): Ditto. + (_mm_srli_epi32): Ditto. + (_mm_srli_epi64): Ditto. + (_mm_extract_epi16): Ditto. + (_mm_insert_epi16): Ditto. + (_mm_shufflehi_epi16): Ditto. + (_mm_shufflelo_epi16): Ditto. + (_mm_shuffle_epi32): Ditto. + * config/i386/xmmintrin.h (_mm_extract_pi16): Ditto. + (_m_pextrw): Ditto. + (_mm_insert_pi16): Ditto. + (_m_pinsrw): Ditto. + (_mm_shuffle_pi16): Ditto. + (_m_pshufw): Ditto. + (_mm_shufle_ps): Ditto. + (_mm_prefetch): Ditto. + 2007-05-21 Andreas Krebbel * defaults.h (IBM_FLOAT_FORMAT): Macro definition removed. diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h index 51eaefd..c615b19 100644 --- a/gcc/config/i386/ammintrin.h +++ b/gcc/config/i386/ammintrin.h @@ -55,11 +55,16 @@ _mm_extract_si64 (__m128i __X, __m128i __Y) return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y); } +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L) { return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L); } +#else +#define _mm_extracti_si64(X, I, L) \ + ((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L)) +#endif static __inline __m128i __attribute__((__always_inline__)) _mm_insert_si64 (__m128i __X,__m128i __Y) @@ -67,12 +72,16 @@ _mm_insert_si64 (__m128i __X,__m128i __Y) return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y); } +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L) { return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L); } - +#else +#define _mm_inserti_si64(X, Y, I, L) \ + ((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L)) +#endif #endif /* __SSE4A__ */ diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index f878728..a886b71 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -880,11 +880,16 @@ _mm_cvtss_sd (__m128d __A, __m128 __B) return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); } +#ifdef __OPTIMIZE__ static __inline __m128d __attribute__((__always_inline__)) _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) { return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); } +#else +#define _mm_shuffle_pd(__A, __B, __C) \ + ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C))) +#endif static __inline __m128d __attribute__((__always_inline__)) _mm_unpackhi_pd (__m128d __A, __m128d __B) @@ -1108,6 +1113,7 @@ _mm_mul_epu32 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); } +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_slli_epi16 (__m128i __A, const int __B) { @@ -1125,7 +1131,16 @@ _mm_slli_epi64 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); } +#else +#define _mm_slli_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_psllwi128 ((__v8hi)(__A), __B)) +#define _mm_slli_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_pslldi128 ((__v4si)(__A), __B)) +#define _mm_slli_epi64(__A, __B) \ + ((__m128i)__builtin_ia32_psllqi128 ((__v2di)(__A), __B)) +#endif +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_srai_epi16 (__m128i __A, const int __B) { @@ -1137,7 +1152,14 @@ _mm_srai_epi32 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); } +#else +#define _mm_srai_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_psrawi128 ((__v8hi)(__A), __B)) +#define _mm_srai_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_psradi128 ((__v4si)(__A), __B)) +#endif +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_srli_si128 (__m128i __A, const int __B) { @@ -1149,7 +1171,14 @@ _mm_slli_si128 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8); } +#else +#define _mm_srli_si128(__A, __B) \ + ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8)) +#define _mm_slli_si128(__A, __B) \ + ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8)) +#endif +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_srli_epi16 (__m128i __A, const int __B) { @@ -1167,6 +1196,14 @@ _mm_srli_epi64 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); } +#else +#define _mm_srli_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_psrlwi128 ((__v8hi)(__A), __B)) +#define _mm_srli_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_psrldi128 ((__v4si)(__A), __B)) +#define _mm_srli_epi64(__A, __B) \ + ((__m128i)__builtin_ia32_psrlqi128 ((__v2di)(__A), __B)) +#endif static __inline __m128i __attribute__((__always_inline__)) _mm_sll_epi16 (__m128i __A, __m128i __B) @@ -1294,6 +1331,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); } +#ifdef __OPTIMIZE__ static __inline int __attribute__((__always_inline__)) _mm_extract_epi16 (__m128i const __A, int const __N) { @@ -1305,6 +1343,12 @@ _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) { return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); } +#else +#define _mm_extract_epi16(A, N) \ + ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N))) +#define _mm_insert_epi16(A, D, N) \ + ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N))) +#endif static __inline __m128i __attribute__((__always_inline__)) _mm_max_epi16 (__m128i __A, __m128i __B) @@ -1342,6 +1386,7 @@ _mm_mulhi_epu16 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); } +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_shufflehi_epi16 (__m128i __A, const int __mask) { @@ -1359,6 +1404,14 @@ _mm_shuffle_epi32 (__m128i __A, const int __mask) { return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); } +#else +#define _mm_shufflehi_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) +#define _mm_shufflelo_epi16(__A, __B) \ + ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) +#define _mm_shuffle_epi32(__A, __B) \ + ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) +#endif static __inline void __attribute__((__always_inline__)) _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h index dbcfbd0..6d4e290 100644 --- a/gcc/config/i386/tmmintrin.h +++ b/gcc/config/i386/tmmintrin.h @@ -181,6 +181,7 @@ _mm_sign_pi32 (__m64 __X, __m64 __Y) return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); } +#ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__)) _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) { @@ -191,6 +192,12 @@ _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) { return (__m64)__builtin_ia32_palignr ((long long)__X, (long long)__Y, __N * 8); } +#else +#define _mm_alignr_epi8(__X, __Y, __N) \ + ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8)) +#define _mm_alignr_pi8(__X, __Y, __N) \ + ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) +#endif static __inline __m128i __attribute__((__always_inline__)) _mm_abs_epi8 (__m128i __X) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 3716daa..b55474c 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -716,11 +716,16 @@ _mm_cvtps_pi8(__m128 __A) } /* Selects four specific SPFP values from A and B based on MASK. */ +#ifdef __OPTIMIZE__ static __inline __m128 __attribute__((__always_inline__)) _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) { return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); } +#else +#define _mm_shuffle_ps(A, B, MASK) \ + ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK))) +#endif /* Selects and interleaves the upper two SPFP values from A and B. */ static __inline __m128 __attribute__((__always_inline__)) @@ -986,6 +991,7 @@ _mm_move_ss (__m128 __A, __m128 __B) } /* Extracts one of the four words of A. The selector N must be immediate. */ +#ifdef __OPTIMIZE__ static __inline int __attribute__((__always_inline__)) _mm_extract_pi16 (__m64 const __A, int const __N) { @@ -997,9 +1003,14 @@ _m_pextrw (__m64 const __A, int const __N) { return _mm_extract_pi16 (__A, __N); } +#else +#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N)) +#define _m_pextrw(A, N) _mm_extract_pi16((A), (N)) +#endif /* Inserts word D into one of four words of A. The selector N must be immediate. */ +#ifdef __OPTIMIZE__ static __inline __m64 __attribute__((__always_inline__)) _mm_insert_pi16 (__m64 const __A, int const __D, int const __N) { @@ -1011,6 +1022,11 @@ _m_pinsrw (__m64 const __A, int const __D, int const __N) { return _mm_insert_pi16 (__A, __D, __N); } +#else +#define _mm_insert_pi16(A, D, N) \ + ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N))) +#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N)) +#endif /* Compute the element-wise maximum of signed 16-bit values. */ static __inline __m64 __attribute__((__always_inline__)) @@ -1093,6 +1109,7 @@ _m_pmulhuw (__m64 __A, __m64 __B) /* Return a combination of the four 16-bit values in A. The selector must be an immediate. */ +#ifdef __OPTIMIZE__ static __inline __m64 __attribute__((__always_inline__)) _mm_shuffle_pi16 (__m64 __A, int const __N) { @@ -1104,6 +1121,11 @@ _m_pshufw (__m64 __A, int const __N) { return _mm_shuffle_pi16 (__A, __N); } +#else +#define _mm_shuffle_pi16(A, N) \ + ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N))) +#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N)) +#endif /* Conditionally store byte elements of A into P. The high bit of each byte in the selector N determines whether the corresponding byte from @@ -1163,11 +1185,16 @@ _m_psadbw (__m64 __A, __m64 __B) /* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */ +#ifdef __OPTIMIZE__ static __inline void __attribute__((__always_inline__)) _mm_prefetch (void *__P, enum _mm_hint __I) { __builtin_prefetch (__P, 0, __I); } +#else +#define _mm_prefetch(P, I) \ + __builtin_prefetch ((P), 0, (I)) +#endif /* Stores the data in A to the address P without polluting the caches. */ static __inline void __attribute__((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 031c15d..9e32a38 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2007-05-21 Uros Bizjak + + * gcc.target/i386/sse-14.c: Remove all intrinsic redefines. + * gcc.target/i386/sse-12: Add -O to compile options. + * g++.dg/other/i386-2.C: Use "-march=k8 -m3dnow -mssse3 + -msse4a" instead of "-msse3". Include only ammintrin.h, tmmintrin.h + and mm3dnow.h. Add -O to compile options. + 2007-05-21 Paul Thomas PR fortran/31867 diff --git a/gcc/testsuite/g++.dg/other/i386-1.C b/gcc/testsuite/g++.dg/other/i386-1.C index 8f03a33..8631617 100644 --- a/gcc/testsuite/g++.dg/other/i386-1.C +++ b/gcc/testsuite/g++.dg/other/i386-1.C @@ -1,6 +1,5 @@ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-march=pentium4" } */ -/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-msse2" } */ #include #include diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index b988f9d..e970923 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,12 +1,10 @@ -/* Test that {,x,e,p}mmintrin.h and mm_malloc.h are - usable with -pedantic-errors. */ +/* Test that {,x,e,p,t,a}mmintrin.h, mm3dnow.h and mm_malloc.h are + usable with -O -pedantic-errors. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-pedantic-errors -msse3" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4a" } */ -#include -#include -#include -#include -#include +#include +#include +#include int dummy; diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index c0ab478..de96ea8 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -1,7 +1,7 @@ /* Test that {,x,e,p,t,a}mmintrin.h, mm3dnow.h and mm_malloc.h are - usable with -std=c89 -pedantic-errors. */ + usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-std=c89 -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4a" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mssse3 -msse4a" } */ #include #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index aca882a..4e06514 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -9,43 +9,6 @@ #define static #define __inline -/* Following intrinsics require immediate arguments. */ - -/* ammintrin.h */ -#define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1) -#define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1) - -/* tmmintrin.h */ -#define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8) -#define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8) - -/* emmintrin.h */ -#define __builtin_ia32_psllwi128(A, B) __builtin_ia32_psllwi128(A, 1) -#define __builtin_ia32_psrlqi128(A, B) __builtin_ia32_psrlqi128(A, 1) -#define __builtin_ia32_psrlwi128(A, B) __builtin_ia32_psrlwi128(A, 1) -#define __builtin_ia32_psrldi128(A, B) __builtin_ia32_psrldi128(A, 1) -#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8) -#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8) -#define __builtin_ia32_psrawi128(A, B) __builtin_ia32_psrawi128(A, 1) -#define __builtin_ia32_psradi128(A, B) __builtin_ia32_psradi128(A, 1) -#define __builtin_ia32_psllqi128(A, B) __builtin_ia32_psllqi128(A, 1) -#define __builtin_ia32_pslldi128(A, B) __builtin_ia32_pslldi128(A, 1) -#define __builtin_ia32_pshufhw(A, N) __builtin_ia32_pshufhw(A, 0) -#define __builtin_ia32_pshuflw(A, N) __builtin_ia32_pshuflw(A, 0) -#define __builtin_ia32_pshufd(A, N) __builtin_ia32_pshufd(A, 0) -#define __builtin_ia32_vec_set_v8hi(A, D, N) \ - __builtin_ia32_vec_set_v8hi(A, D, 0) -#define __builtin_ia32_vec_ext_v8hi(A, N) __builtin_ia32_vec_ext_v8hi(A, 0) -#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) - -/* xmmintrin.h */ -#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA) -#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) -#define __builtin_ia32_vec_set_v4hi(A, D, N) \ - __builtin_ia32_vec_set_v4hi(A, D, 0) -#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) -#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) - #include #include -- 2.7.4