From fc9cf6da84a899334fa3cdd50e62d780b2a90c4a Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 19 Apr 2016 14:33:36 +0000 Subject: [PATCH] Remove UNSPEC_LOADU and UNSPEC_STOREU Since *mov_internal and _(load|store)_mask patterns can handle unaligned load and store, we can remove UNSPEC_LOADU and UNSPEC_STOREU. We use function prototypes with pointer to scalar for unaligned load/store builtin functions so that memory passed to *mov_internal is unaligned. gcc/ PR target/69201 * config/i386/avx512bwintrin.h (_mm512_mask_loadu_epi16): Pass const short * to __builtin_ia32_loaddquhi512_mask. (_mm512_maskz_loadu_epi16): Likewise. (_mm512_mask_storeu_epi16): Pass short * to __builtin_ia32_storedquhi512_mask. (_mm512_mask_loadu_epi8): Pass const char * to __builtin_ia32_loaddquqi512_mask. (_mm512_maskz_loadu_epi8): Likewise. (_mm512_mask_storeu_epi8): Pass char * to __builtin_ia32_storedquqi512_mask. * config/i386/avx512fintrin.h (_mm512_loadu_pd): Pass const double * to __builtin_ia32_loadupd512_mask. (_mm512_mask_loadu_pd): Likewise. (_mm512_maskz_loadu_pd): Likewise. (_mm512_storeu_pd): Pass double * to __builtin_ia32_storeupd512_mask. (_mm512_mask_storeu_pd): Likewise. (_mm512_loadu_ps): Pass const float * to __builtin_ia32_loadups512_mask. (_mm512_mask_loadu_ps): Likewise. (_mm512_maskz_loadu_ps): Likewise. (_mm512_storeu_ps): Pass float * to __builtin_ia32_storeups512_mask. (_mm512_mask_storeu_ps): Likewise. (_mm512_mask_loadu_epi64): Pass const long long * to __builtin_ia32_loaddqudi512_mask. (_mm512_maskz_loadu_epi64): Likewise. (_mm512_mask_storeu_epi64): Pass long long * to __builtin_ia32_storedqudi512_mask. (_mm512_loadu_si512): Pass const int * to __builtin_ia32_loaddqusi512_mask. (_mm512_mask_loadu_epi32): Likewise. (_mm512_maskz_loadu_epi32): Likewise. (_mm512_storeu_si512): Pass int * to __builtin_ia32_storedqusi512_mask. (_mm512_mask_storeu_epi32): Likewise. * config/i386/avx512vlbwintrin.h (_mm256_mask_storeu_epi8): Pass char * to __builtin_ia32_storedquqi256_mask. (_mm_mask_storeu_epi8): Likewise. (_mm256_mask_loadu_epi16): Pass const short * to __builtin_ia32_loaddquhi256_mask. (_mm256_maskz_loadu_epi16): Likewise. (_mm_mask_loadu_epi16): Pass const short * to __builtin_ia32_loaddquhi128_mask. (_mm_maskz_loadu_epi16): Likewise. (_mm256_mask_loadu_epi8): Pass const char * to __builtin_ia32_loaddquqi256_mask. (_mm256_maskz_loadu_epi8): Likewise. (_mm_mask_loadu_epi8): Pass const char * to __builtin_ia32_loaddquqi128_mask. (_mm_maskz_loadu_epi8): Likewise. (_mm256_mask_storeu_epi16): Pass short * to. __builtin_ia32_storedquhi256_mask. (_mm_mask_storeu_epi16): Pass short * to. __builtin_ia32_storedquhi128_mask. * config/i386/avx512vlintrin.h (_mm256_mask_loadu_pd): Pass const double * to __builtin_ia32_loadupd256_mask. (_mm256_maskz_loadu_pd): Likewise. (_mm_mask_loadu_pd): Pass onst double * to __builtin_ia32_loadupd128_mask. (_mm_maskz_loadu_pd): Likewise. (_mm256_mask_storeu_pd): Pass double * to __builtin_ia32_storeupd256_mask. (_mm_mask_storeu_pd): Pass double * to __builtin_ia32_storeupd128_mask. (_mm256_mask_loadu_ps): Pass const float * to __builtin_ia32_loadups256_mask. (_mm256_maskz_loadu_ps): Likewise. (_mm_mask_loadu_ps): Pass const float * to __builtin_ia32_loadups128_mask. (_mm_maskz_loadu_ps): Likewise. (_mm256_mask_storeu_ps): Pass float * to __builtin_ia32_storeups256_mask. (_mm_mask_storeu_ps): ass float * to __builtin_ia32_storeups128_mask. (_mm256_mask_loadu_epi64): Pass const long long * to __builtin_ia32_loaddqudi256_mask. (_mm256_maskz_loadu_epi64): Likewise. (_mm_mask_loadu_epi64): Pass const long long * to __builtin_ia32_loaddqudi128_mask. (_mm_maskz_loadu_epi64): Likewise. (_mm256_mask_storeu_epi64): Pass long long * to __builtin_ia32_storedqudi256_mask. (_mm_mask_storeu_epi64): Pass long long * to __builtin_ia32_storedqudi128_mask. (_mm256_mask_loadu_epi32): Pass const int * to __builtin_ia32_loaddqusi256_mask. (_mm256_maskz_loadu_epi32): Likewise. (_mm_mask_loadu_epi32): Pass const int * to __builtin_ia32_loaddqusi128_mask. (_mm_maskz_loadu_epi32): Likewise. (_mm256_mask_storeu_epi32): Pass int * to __builtin_ia32_storedqusi256_mask. (_mm_mask_storeu_epi32): Pass int * to __builtin_ia32_storedqusi128_mask. * config/i386/i386-builtin-types.def (PCSHORT): New. (PINT64): Likewise. (V64QI_FTYPE_PCCHAR_V64QI_UDI): Likewise. (V32HI_FTYPE_PCSHORT_V32HI_USI): Likewise. (V32QI_FTYPE_PCCHAR_V32QI_USI): Likewise. (V16SF_FTYPE_PCFLOAT_V16SF_UHI): Likewise. (V8DF_FTYPE_PCDOUBLE_V8DF_UQI): Likewise. (V16SI_FTYPE_PCINT_V16SI_UHI): Likewise. (V16HI_FTYPE_PCSHORT_V16HI_UHI): Likewise. (V16QI_FTYPE_PCCHAR_V16QI_UHI): Likewise. (V8SF_FTYPE_PCFLOAT_V8SF_UQI): Likewise. (V8DI_FTYPE_PCINT64_V8DI_UQI): Likewise. (V8SI_FTYPE_PCINT_V8SI_UQI): Likewise. (V8HI_FTYPE_PCSHORT_V8HI_UQI): Likewise. (V4DF_FTYPE_PCDOUBLE_V4DF_UQI): Likewise. (V4SF_FTYPE_PCFLOAT_V4SF_UQI): Likewise. (V4DI_FTYPE_PCINT64_V4DI_UQI): Likewise. (V4SI_FTYPE_PCINT_V4SI_UQI): Likewise. (V2DF_FTYPE_PCDOUBLE_V2DF_UQI): Likewise. (V2DI_FTYPE_PCINT64_V2DI_UQI): Likewise. (VOID_FTYPE_PDOUBLE_V8DF_UQI): Likewise. (VOID_FTYPE_PDOUBLE_V4DF_UQI): Likewise. (VOID_FTYPE_PDOUBLE_V2DF_UQI): Likewise. (VOID_FTYPE_PFLOAT_V16SF_UHI): Likewise. (VOID_FTYPE_PFLOAT_V8SF_UQI): Likewise. (VOID_FTYPE_PFLOAT_V4SF_UQI): Likewise. (VOID_FTYPE_PINT64_V8DI_UQI): Likewise. (VOID_FTYPE_PINT64_V4DI_UQI): Likewise. (VOID_FTYPE_PINT64_V2DI_UQI): Likewise. (VOID_FTYPE_PINT_V16SI_UHI): Likewise. (VOID_FTYPE_PINT_V8SI_UHI): Likewise. (VOID_FTYPE_PINT_V4SI_UHI): Likewise. (VOID_FTYPE_PSHORT_V32HI_USI): Likewise. (VOID_FTYPE_PSHORT_V16HI_UHI): Likewise. (VOID_FTYPE_PSHORT_V8HI_UQI): Likewise. (VOID_FTYPE_PCHAR_V64QI_UDI): Likewise. (VOID_FTYPE_PCHAR_V32QI_USI): Likewise. (VOID_FTYPE_PCHAR_V16QI_UHI): Likewise. (V64QI_FTYPE_PCV64QI_V64QI_UDI): Removed. (V32HI_FTYPE_PCV32HI_V32HI_USI): Likewise. (V32QI_FTYPE_PCV32QI_V32QI_USI): Likewise. (V16HI_FTYPE_PCV16HI_V16HI_UHI): Likewise. (V16QI_FTYPE_PCV16QI_V16QI_UHI): Likewise. (V8HI_FTYPE_PCV8HI_V8HI_UQI): Likewise. (VOID_FTYPE_PV32HI_V32HI_USI): Likewise. (VOID_FTYPE_PV16HI_V16HI_UHI): Likewise. (VOID_FTYPE_PV8HI_V8HI_UQI): Likewise. (VOID_FTYPE_PV64QI_V64QI_UDI): Likewise. (VOID_FTYPE_PV32QI_V32QI_USI): Likewise. (VOID_FTYPE_PV16QI_V16QI_UHI): Likewise. * config/i386/i386.c (ix86_emit_save_reg_using_mov): Don't use UNSPEC_STOREU. (ix86_emit_restore_sse_regs_using_mov): Don't use UNSPEC_LOADU. (ix86_avx256_split_vector_move_misalign): Don't use unaligned load nor store. (ix86_expand_vector_move_misalign): Likewise. (bdesc_special_args): Use CODE_FOR_movvNXY_internal and pointer to scalar function prototype for unaligned load/store builtins. (ix86_expand_special_args_builtin): Updated. * config/i386/sse.md (UNSPEC_LOADU): Removed. (UNSPEC_STOREU): Likewise. (VI_ULOADSTORE_BW_AVX512VL): Likewise. (VI_ULOADSTORE_F_AVX512VL): Likewise. (ssescalarsize): Handle V4TI, V2TI and V1TI. (_loadu): Likewise. (*_loadu): Likewise. (_storeu): Likewise. (_storeu_mask): Likewise. (_loaddqu): Likewise. (*_loaddqu"): Likewise. (sse2_avx_avx512f>_storedqu): Likewise. (_storedqu_mask): Likewise. (*sse4_2_pcmpestr_unaligned): Likewise. (*sse4_2_pcmpistr_unaligned): Likewise. (*mov_internal): Renamed to ... (mov_internal): This. Remove check of AVX and IAMCU on misaligned operand. Replace vmovdqu64 with vmovdqu. (movsd/movhpd to movupd peephole): Don't use UNSPEC_LOADU. (movlpd/movhpd to movupd peephole): Don't use UNSPEC_STOREU. gcc/testsuite/ PR target/69201 * gcc.target/i386/avx256-unaligned-store-1.c (a): Make it extern to force it misaligned. (b): Likewise. (c): Likewise. (d): Likewise. Check vmovups.*movv8sf_internal/3 instead of avx_storeups256. Don't check `*' before movv4sf_internal. * gcc.target/i386/avx256-unaligned-store-2.c: Check vmovups.*movv32qi_internal/3 instead of avx_storeups256. Don't check `*' before movv16qi_internal. * gcc.target/i386/avx256-unaligned-store-3.c (a): Make it extern to force it misaligned. (b): Likewise. (c): Likewise. (d): Likewise. Check vmovups.*movv4df_internal/3 instead of avx_storeupd256. Don't check `*' before movv2df_internal. * gcc.target/i386/avx256-unaligned-store-4.c (a): Make it extern to force it misaligned. (b): Likewise. (c): Likewise. (d): Likewise. Check movv8sf_internal instead of avx_storeups256. Check movups.*movv4sf_internal/3 instead of avx_storeups256. From-SVN: r235209 --- gcc/ChangeLog | 178 ++++++++ gcc/config/i386/avx512bwintrin.h | 12 +- gcc/config/i386/avx512fintrin.h | 36 +- gcc/config/i386/avx512vlbwintrin.h | 24 +- gcc/config/i386/avx512vlintrin.h | 48 +- gcc/config/i386/i386-builtin-types.def | 50 +- gcc/config/i386/i386.c | 242 +++++----- gcc/config/i386/sse.md | 504 +-------------------- gcc/testsuite/ChangeLog | 28 ++ .../gcc.target/i386/avx256-unaligned-store-1.c | 6 +- .../gcc.target/i386/avx256-unaligned-store-2.c | 4 +- .../gcc.target/i386/avx256-unaligned-store-3.c | 6 +- .../gcc.target/i386/avx256-unaligned-store-4.c | 7 +- 13 files changed, 436 insertions(+), 709 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8a4e327..9f45806 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,181 @@ +2016-04-19 H.J. Lu + + PR target/69201 + * config/i386/avx512bwintrin.h (_mm512_mask_loadu_epi16): Pass + const short * to __builtin_ia32_loaddquhi512_mask. + (_mm512_maskz_loadu_epi16): Likewise. + (_mm512_mask_storeu_epi16): Pass short * to + __builtin_ia32_storedquhi512_mask. + (_mm512_mask_loadu_epi8): Pass const char * to + __builtin_ia32_loaddquqi512_mask. + (_mm512_maskz_loadu_epi8): Likewise. + (_mm512_mask_storeu_epi8): Pass char * to + __builtin_ia32_storedquqi512_mask. + * config/i386/avx512fintrin.h (_mm512_loadu_pd): Pass + const double * to __builtin_ia32_loadupd512_mask. + (_mm512_mask_loadu_pd): Likewise. + (_mm512_maskz_loadu_pd): Likewise. + (_mm512_storeu_pd): Pass double * to + __builtin_ia32_storeupd512_mask. + (_mm512_mask_storeu_pd): Likewise. + (_mm512_loadu_ps): Pass const float * to + __builtin_ia32_loadups512_mask. + (_mm512_mask_loadu_ps): Likewise. + (_mm512_maskz_loadu_ps): Likewise. + (_mm512_storeu_ps): Pass float * to + __builtin_ia32_storeups512_mask. + (_mm512_mask_storeu_ps): Likewise. + (_mm512_mask_loadu_epi64): Pass const long long * to + __builtin_ia32_loaddqudi512_mask. + (_mm512_maskz_loadu_epi64): Likewise. + (_mm512_mask_storeu_epi64): Pass long long * + to __builtin_ia32_storedqudi512_mask. + (_mm512_loadu_si512): Pass const int * to + __builtin_ia32_loaddqusi512_mask. + (_mm512_mask_loadu_epi32): Likewise. + (_mm512_maskz_loadu_epi32): Likewise. + (_mm512_storeu_si512): Pass int * to + __builtin_ia32_storedqusi512_mask. + (_mm512_mask_storeu_epi32): Likewise. + * config/i386/avx512vlbwintrin.h (_mm256_mask_storeu_epi8): Pass + char * to __builtin_ia32_storedquqi256_mask. + (_mm_mask_storeu_epi8): Likewise. + (_mm256_mask_loadu_epi16): Pass const short * to + __builtin_ia32_loaddquhi256_mask. + (_mm256_maskz_loadu_epi16): Likewise. + (_mm_mask_loadu_epi16): Pass const short * to + __builtin_ia32_loaddquhi128_mask. + (_mm_maskz_loadu_epi16): Likewise. + (_mm256_mask_loadu_epi8): Pass const char * to + __builtin_ia32_loaddquqi256_mask. + (_mm256_maskz_loadu_epi8): Likewise. + (_mm_mask_loadu_epi8): Pass const char * to + __builtin_ia32_loaddquqi128_mask. + (_mm_maskz_loadu_epi8): Likewise. + (_mm256_mask_storeu_epi16): Pass short * to. + __builtin_ia32_storedquhi256_mask. + (_mm_mask_storeu_epi16): Pass short * to. + __builtin_ia32_storedquhi128_mask. + * config/i386/avx512vlintrin.h (_mm256_mask_loadu_pd): Pass + const double * to __builtin_ia32_loadupd256_mask. + (_mm256_maskz_loadu_pd): Likewise. + (_mm_mask_loadu_pd): Pass onst double * to + __builtin_ia32_loadupd128_mask. + (_mm_maskz_loadu_pd): Likewise. + (_mm256_mask_storeu_pd): Pass double * to + __builtin_ia32_storeupd256_mask. + (_mm_mask_storeu_pd): Pass double * to + __builtin_ia32_storeupd128_mask. + (_mm256_mask_loadu_ps): Pass const float * to + __builtin_ia32_loadups256_mask. + (_mm256_maskz_loadu_ps): Likewise. + (_mm_mask_loadu_ps): Pass const float * to + __builtin_ia32_loadups128_mask. + (_mm_maskz_loadu_ps): Likewise. + (_mm256_mask_storeu_ps): Pass float * to + __builtin_ia32_storeups256_mask. + (_mm_mask_storeu_ps): ass float * to + __builtin_ia32_storeups128_mask. + (_mm256_mask_loadu_epi64): Pass const long long * to + __builtin_ia32_loaddqudi256_mask. + (_mm256_maskz_loadu_epi64): Likewise. + (_mm_mask_loadu_epi64): Pass const long long * to + __builtin_ia32_loaddqudi128_mask. + (_mm_maskz_loadu_epi64): Likewise. + (_mm256_mask_storeu_epi64): Pass long long * to + __builtin_ia32_storedqudi256_mask. + (_mm_mask_storeu_epi64): Pass long long * to + __builtin_ia32_storedqudi128_mask. + (_mm256_mask_loadu_epi32): Pass const int * to + __builtin_ia32_loaddqusi256_mask. + (_mm256_maskz_loadu_epi32): Likewise. + (_mm_mask_loadu_epi32): Pass const int * to + __builtin_ia32_loaddqusi128_mask. + (_mm_maskz_loadu_epi32): Likewise. + (_mm256_mask_storeu_epi32): Pass int * to + __builtin_ia32_storedqusi256_mask. + (_mm_mask_storeu_epi32): Pass int * to + __builtin_ia32_storedqusi128_mask. + * config/i386/i386-builtin-types.def (PCSHORT): New. + (PINT64): Likewise. + (V64QI_FTYPE_PCCHAR_V64QI_UDI): Likewise. + (V32HI_FTYPE_PCSHORT_V32HI_USI): Likewise. + (V32QI_FTYPE_PCCHAR_V32QI_USI): Likewise. + (V16SF_FTYPE_PCFLOAT_V16SF_UHI): Likewise. + (V8DF_FTYPE_PCDOUBLE_V8DF_UQI): Likewise. + (V16SI_FTYPE_PCINT_V16SI_UHI): Likewise. + (V16HI_FTYPE_PCSHORT_V16HI_UHI): Likewise. + (V16QI_FTYPE_PCCHAR_V16QI_UHI): Likewise. + (V8SF_FTYPE_PCFLOAT_V8SF_UQI): Likewise. + (V8DI_FTYPE_PCINT64_V8DI_UQI): Likewise. + (V8SI_FTYPE_PCINT_V8SI_UQI): Likewise. + (V8HI_FTYPE_PCSHORT_V8HI_UQI): Likewise. + (V4DF_FTYPE_PCDOUBLE_V4DF_UQI): Likewise. + (V4SF_FTYPE_PCFLOAT_V4SF_UQI): Likewise. + (V4DI_FTYPE_PCINT64_V4DI_UQI): Likewise. + (V4SI_FTYPE_PCINT_V4SI_UQI): Likewise. + (V2DF_FTYPE_PCDOUBLE_V2DF_UQI): Likewise. + (V2DI_FTYPE_PCINT64_V2DI_UQI): Likewise. + (VOID_FTYPE_PDOUBLE_V8DF_UQI): Likewise. + (VOID_FTYPE_PDOUBLE_V4DF_UQI): Likewise. + (VOID_FTYPE_PDOUBLE_V2DF_UQI): Likewise. + (VOID_FTYPE_PFLOAT_V16SF_UHI): Likewise. + (VOID_FTYPE_PFLOAT_V8SF_UQI): Likewise. + (VOID_FTYPE_PFLOAT_V4SF_UQI): Likewise. + (VOID_FTYPE_PINT64_V8DI_UQI): Likewise. + (VOID_FTYPE_PINT64_V4DI_UQI): Likewise. + (VOID_FTYPE_PINT64_V2DI_UQI): Likewise. + (VOID_FTYPE_PINT_V16SI_UHI): Likewise. + (VOID_FTYPE_PINT_V8SI_UHI): Likewise. + (VOID_FTYPE_PINT_V4SI_UHI): Likewise. + (VOID_FTYPE_PSHORT_V32HI_USI): Likewise. + (VOID_FTYPE_PSHORT_V16HI_UHI): Likewise. + (VOID_FTYPE_PSHORT_V8HI_UQI): Likewise. + (VOID_FTYPE_PCHAR_V64QI_UDI): Likewise. + (VOID_FTYPE_PCHAR_V32QI_USI): Likewise. + (VOID_FTYPE_PCHAR_V16QI_UHI): Likewise. + (V64QI_FTYPE_PCV64QI_V64QI_UDI): Removed. + (V32HI_FTYPE_PCV32HI_V32HI_USI): Likewise. + (V32QI_FTYPE_PCV32QI_V32QI_USI): Likewise. + (V16HI_FTYPE_PCV16HI_V16HI_UHI): Likewise. + (V16QI_FTYPE_PCV16QI_V16QI_UHI): Likewise. + (V8HI_FTYPE_PCV8HI_V8HI_UQI): Likewise. + (VOID_FTYPE_PV32HI_V32HI_USI): Likewise. + (VOID_FTYPE_PV16HI_V16HI_UHI): Likewise. + (VOID_FTYPE_PV8HI_V8HI_UQI): Likewise. + (VOID_FTYPE_PV64QI_V64QI_UDI): Likewise. + (VOID_FTYPE_PV32QI_V32QI_USI): Likewise. + (VOID_FTYPE_PV16QI_V16QI_UHI): Likewise. + * config/i386/i386.c (ix86_emit_save_reg_using_mov): Don't + use UNSPEC_STOREU. + (ix86_emit_restore_sse_regs_using_mov): Don't use UNSPEC_LOADU. + (ix86_avx256_split_vector_move_misalign): Don't use unaligned + load nor store. + (ix86_expand_vector_move_misalign): Likewise. + (bdesc_special_args): Use CODE_FOR_movvNXY_internal and pointer + to scalar function prototype for unaligned load/store builtins. + (ix86_expand_special_args_builtin): Updated. + * config/i386/sse.md (UNSPEC_LOADU): Removed. + (UNSPEC_STOREU): Likewise. + (VI_ULOADSTORE_BW_AVX512VL): Likewise. + (VI_ULOADSTORE_F_AVX512VL): Likewise. + (ssescalarsize): Handle V4TI, V2TI and V1TI. + (_loadu): Likewise. + (*_loadu): Likewise. + (_storeu): Likewise. + (_storeu_mask): Likewise. + (_loaddqu): Likewise. + (*_loaddqu"): Likewise. + (sse2_avx_avx512f>_storedqu): Likewise. + (_storedqu_mask): Likewise. + (*sse4_2_pcmpestr_unaligned): Likewise. + (*sse4_2_pcmpistr_unaligned): Likewise. + (*mov_internal): Renamed to ... + (mov_internal): This. Remove check of AVX and IAMCU on + misaligned operand. Replace vmovdqu64 with vmovdqu. + (movsd/movhpd to movupd peephole): Don't use UNSPEC_LOADU. + (movlpd/movhpd to movupd peephole): Don't use UNSPEC_STOREU. + 2016-04-19 Richard Biener PR tree-optimization/70171 diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index f40a7d9..e1dafba 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -87,7 +87,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, + return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P, (__v32hi) __W, (__mmask32) __U); } @@ -96,7 +96,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, + return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P, (__v32hi) _mm512_setzero_hi (), (__mmask32) __U); @@ -106,7 +106,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) { - __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, + __builtin_ia32_storedquhi512_mask ((short *) __P, (__v32hi) __A, (__mmask32) __U); } @@ -150,7 +150,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, + return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P, (__v64qi) __W, (__mmask64) __U); } @@ -159,7 +159,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, + return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P, (__v64qi) _mm512_setzero_hi (), (__mmask64) __U); @@ -169,7 +169,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) { - __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, + __builtin_ia32_storedquqi512_mask ((char *) __P, (__v64qi) __A, (__mmask64) __U); } diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e009d8c..2f51be9 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -5671,7 +5671,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_loadu_pd (void const *__P) { - return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, + return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, (__v8df) _mm512_undefined_pd (), (__mmask8) -1); @@ -5681,7 +5681,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) { - return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, + return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, (__v8df) __W, (__mmask8) __U); } @@ -5690,7 +5690,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) { - return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, + return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); @@ -5700,7 +5700,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_storeu_pd (void *__P, __m512d __A) { - __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, + __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A, (__mmask8) -1); } @@ -5708,7 +5708,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) { - __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, + __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A, (__mmask8) __U); } @@ -5716,7 +5716,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_loadu_ps (void const *__P) { - return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, + return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, (__v16sf) _mm512_undefined_ps (), (__mmask16) -1); @@ -5726,7 +5726,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) { - return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, + return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, (__v16sf) __W, (__mmask16) __U); } @@ -5735,7 +5735,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) { - return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, + return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); @@ -5745,7 +5745,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_storeu_ps (void *__P, __m512 __A) { - __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, + __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A, (__mmask16) -1); } @@ -5753,7 +5753,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) { - __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, + __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A, (__mmask16) __U); } @@ -5761,7 +5761,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, + return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, (__v8di) __W, (__mmask8) __U); } @@ -5770,7 +5770,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, + return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); @@ -5780,7 +5780,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) { - __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A, + __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A, (__mmask8) __U); } @@ -5788,7 +5788,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_loadu_si512 (void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, (__v16si) _mm512_setzero_si512 (), (__mmask16) -1); @@ -5798,7 +5798,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, (__v16si) __W, (__mmask16) __U); } @@ -5807,7 +5807,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, + return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U); @@ -5817,7 +5817,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_storeu_si512 (void *__P, __m512i __A) { - __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, + __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, (__mmask16) -1); } @@ -5825,7 +5825,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) { - __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, + __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, (__mmask16) __U); } diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index f260526..5f3d51c 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -77,7 +77,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) { - __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, + __builtin_ia32_storedquqi256_mask ((char *) __P, (__v32qi) __A, (__mmask32) __U); } @@ -86,7 +86,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) { - __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, + __builtin_ia32_storedquqi128_mask ((char *) __P, (__v16qi) __A, (__mmask16) __U); } @@ -95,7 +95,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, + return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P, (__v16hi) __W, (__mmask16) __U); } @@ -104,7 +104,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, + return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U); @@ -114,7 +114,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, + return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P, (__v8hi) __W, (__mmask8) __U); } @@ -123,7 +123,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, + return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P, (__v8hi) _mm_setzero_hi (), (__mmask8) __U); @@ -172,7 +172,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, + return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P, (__v32qi) __W, (__mmask32) __U); } @@ -181,7 +181,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, + return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P, (__v32qi) _mm256_setzero_si256 (), (__mmask32) __U); @@ -191,7 +191,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, + return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P, (__v16qi) __W, (__mmask16) __U); } @@ -200,7 +200,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, + return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P, (__v16qi) _mm_setzero_hi (), (__mmask16) __U); @@ -3679,7 +3679,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) { - __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, + __builtin_ia32_storedquhi256_mask ((short *) __P, (__v16hi) __A, (__mmask16) __U); } @@ -3688,7 +3688,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) { - __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, + __builtin_ia32_storedquhi128_mask ((short *) __P, (__v8hi) __A, (__mmask8) __U); } diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index d0ffb2b..d59bc6c 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -626,7 +626,7 @@ extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P, (__v4df) __W, (__mmask8) __U); } @@ -635,7 +635,7 @@ extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); @@ -645,7 +645,7 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P, (__v2df) __W, (__mmask8) __U); } @@ -654,7 +654,7 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) __U); @@ -664,7 +664,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) { - __builtin_ia32_storeupd256_mask ((__v4df *) __P, + __builtin_ia32_storeupd256_mask ((double *) __P, (__v4df) __A, (__mmask8) __U); } @@ -673,7 +673,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) { - __builtin_ia32_storeupd128_mask ((__v2df *) __P, + __builtin_ia32_storeupd128_mask ((double *) __P, (__v2df) __A, (__mmask8) __U); } @@ -682,7 +682,7 @@ extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P, (__v8sf) __W, (__mmask8) __U); } @@ -691,7 +691,7 @@ extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); @@ -701,7 +701,7 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P, (__v4sf) __W, (__mmask8) __U); } @@ -710,7 +710,7 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); @@ -720,7 +720,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) { - __builtin_ia32_storeups256_mask ((__v8sf *) __P, + __builtin_ia32_storeups256_mask ((float *) __P, (__v8sf) __A, (__mmask8) __U); } @@ -729,7 +729,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) { - __builtin_ia32_storeups128_mask ((__v4sf *) __P, + __builtin_ia32_storeups128_mask ((float *) __P, (__v4sf) __A, (__mmask8) __U); } @@ -738,7 +738,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P, (__v4di) __W, (__mmask8) __U); } @@ -747,7 +747,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); @@ -757,7 +757,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P, (__v2di) __W, (__mmask8) __U); } @@ -766,7 +766,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P, (__v2di) _mm_setzero_di (), (__mmask8) __U); @@ -776,7 +776,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) { - __builtin_ia32_storedqudi256_mask ((__v4di *) __P, + __builtin_ia32_storedqudi256_mask ((long long *) __P, (__v4di) __A, (__mmask8) __U); } @@ -785,7 +785,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) { - __builtin_ia32_storedqudi128_mask ((__v2di *) __P, + __builtin_ia32_storedqudi128_mask ((long long *) __P, (__v2di) __A, (__mmask8) __U); } @@ -794,7 +794,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P, (__v8si) __W, (__mmask8) __U); } @@ -803,7 +803,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); @@ -813,7 +813,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P, (__v4si) __W, (__mmask8) __U); } @@ -822,7 +822,7 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); @@ -832,7 +832,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) { - __builtin_ia32_storedqusi256_mask ((__v8si *) __P, + __builtin_ia32_storedqusi256_mask ((int *) __P, (__v8si) __A, (__mmask8) __U); } @@ -841,7 +841,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) { - __builtin_ia32_storedqusi128_mask ((__v4si *) __P, + __builtin_ia32_storedqusi128_mask ((int *) __P, (__v4si) __A, (__mmask8) __U); } diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index b892f08..75d57d9 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -124,6 +124,7 @@ DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST) DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST) DEF_POINTER_TYPE (PCINT, INT, CONST) DEF_POINTER_TYPE (PCINT64, INT64, CONST) +DEF_POINTER_TYPE (PCSHORT, SHORT, CONST) DEF_POINTER_TYPE (PCHAR, CHAR) DEF_POINTER_TYPE (PCVOID, VOID, CONST) DEF_POINTER_TYPE (PVOID, VOID) @@ -132,6 +133,7 @@ DEF_POINTER_TYPE (PFLOAT, FLOAT) DEF_POINTER_TYPE (PSHORT, SHORT) DEF_POINTER_TYPE (PUSHORT, USHORT) DEF_POINTER_TYPE (PINT, INT) +DEF_POINTER_TYPE (PINT64, INT64) DEF_POINTER_TYPE (PLONGLONG, LONGLONG) DEF_POINTER_TYPE (PULONGLONG, ULONGLONG) DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED) @@ -754,24 +756,36 @@ DEF_FUNCTION_TYPE (V16HI, V8HI, V16HI, UHI) DEF_FUNCTION_TYPE (V16HI, HI, V16HI, UHI) DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, UQI) DEF_FUNCTION_TYPE (V8HI, HI, V8HI, UQI) -DEF_FUNCTION_TYPE (V64QI, PCV64QI, V64QI, UDI) -DEF_FUNCTION_TYPE (V32HI, PCV32HI, V32HI, USI) -DEF_FUNCTION_TYPE (V32QI, PCV32QI, V32QI, USI) DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, UHI) DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, UQI) DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, UHI) -DEF_FUNCTION_TYPE (V16HI, PCV16HI, V16HI, UHI) -DEF_FUNCTION_TYPE (V16QI, PCV16QI, V16QI, UHI) DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF, UQI) DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, UQI) DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI, UQI) -DEF_FUNCTION_TYPE (V8HI, PCV8HI, V8HI, UQI) DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF, UQI) DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF, UQI) DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI, UQI) DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI, UQI) DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF, UQI) DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI, UQI) +DEF_FUNCTION_TYPE (V64QI, PCCHAR, V64QI, UDI) +DEF_FUNCTION_TYPE (V32HI, PCSHORT, V32HI, USI) +DEF_FUNCTION_TYPE (V32QI, PCCHAR, V32QI, USI) +DEF_FUNCTION_TYPE (V16SF, PCFLOAT, V16SF, UHI) +DEF_FUNCTION_TYPE (V8DF, PCDOUBLE, V8DF, UQI) +DEF_FUNCTION_TYPE (V16SI, PCINT, V16SI, UHI) +DEF_FUNCTION_TYPE (V16HI, PCSHORT, V16HI, UHI) +DEF_FUNCTION_TYPE (V16QI, PCCHAR, V16QI, UHI) +DEF_FUNCTION_TYPE (V8SF, PCFLOAT, V8SF, UQI) +DEF_FUNCTION_TYPE (V8DI, PCINT64, V8DI, UQI) +DEF_FUNCTION_TYPE (V8SI, PCINT, V8SI, UQI) +DEF_FUNCTION_TYPE (V8HI, PCSHORT, V8HI, UQI) +DEF_FUNCTION_TYPE (V4DF, PCDOUBLE, V4DF, UQI) +DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, UQI) +DEF_FUNCTION_TYPE (V4DI, PCINT64, V4DI, UQI) +DEF_FUNCTION_TYPE (V4SI, PCINT, V4SI, UQI) +DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, UQI) +DEF_FUNCTION_TYPE (V2DI, PCINT64, V2DI, UQI) DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, UHI) DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, UQI) DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, UQI) @@ -823,12 +837,24 @@ DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, UQI) DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, UQI) DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, UQI) DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, UQI) -DEF_FUNCTION_TYPE (VOID, PV32HI, V32HI, USI) -DEF_FUNCTION_TYPE (VOID, PV16HI, V16HI, UHI) -DEF_FUNCTION_TYPE (VOID, PV8HI, V8HI, UQI) -DEF_FUNCTION_TYPE (VOID, PV64QI, V64QI, UDI) -DEF_FUNCTION_TYPE (VOID, PV32QI, V32QI, USI) -DEF_FUNCTION_TYPE (VOID, PV16QI, V16QI, UHI) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF, UQI) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF, UQI) +DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, UQI) +DEF_FUNCTION_TYPE (VOID, PFLOAT, V16SF, UHI) +DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF, UQI) +DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, UQI) +DEF_FUNCTION_TYPE (VOID, PINT64, V8DI, UQI) +DEF_FUNCTION_TYPE (VOID, PINT64, V4DI, UQI) +DEF_FUNCTION_TYPE (VOID, PINT64, V2DI, UQI) +DEF_FUNCTION_TYPE (VOID, PINT, V16SI, UHI) +DEF_FUNCTION_TYPE (VOID, PINT, V8SI, UQI) +DEF_FUNCTION_TYPE (VOID, PINT, V4SI, UQI) +DEF_FUNCTION_TYPE (VOID, PSHORT, V32HI, USI) +DEF_FUNCTION_TYPE (VOID, PSHORT, V16HI, UHI) +DEF_FUNCTION_TYPE (VOID, PSHORT, V8HI, UQI) +DEF_FUNCTION_TYPE (VOID, PCHAR, V64QI, UDI) +DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI, USI) +DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI, UHI) DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, UQI) DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, UQI) DEF_FUNCTION_TYPE (V4SI, V4SF, V4SI, UQI) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a3ffee9..e491dde 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11706,7 +11706,6 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, { struct machine_function *m = cfun->machine; rtx reg = gen_rtx_REG (mode, regno); - rtx unspec = NULL_RTX; rtx mem, addr, base, insn; unsigned int align; @@ -11717,13 +11716,7 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY); set_mem_align (mem, align); - /* SSE saves are not within re-aligned local stack frame. - In case INCOMING_STACK_BOUNDARY is misaligned, we have - to emit unaligned store. */ - if (mode == V4SFmode && align < 128) - unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU); - - insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg)); + insn = emit_insn (gen_rtx_SET (mem, reg)); RTX_FRAME_RELATED_P (insn) = 1; base = addr; @@ -11770,8 +11763,6 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, mem = gen_rtx_MEM (mode, addr); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); } - else if (unspec) - add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); } /* Emit code to save registers using MOV insns. @@ -13323,18 +13314,7 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY); set_mem_align (mem, align); - - /* SSE saves are not within re-aligned local stack frame. - In case INCOMING_STACK_BOUNDARY is misaligned, we have - to emit unaligned load. */ - if (align < 128) - { - rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem), - UNSPEC_LOADU); - emit_insn (gen_rtx_SET (reg, unspec)); - } - else - emit_insn (gen_rtx_SET (reg, mem)); + emit_insn (gen_rtx_SET (reg, mem)); ix86_add_cfa_restore_note (NULL, reg, cfa_offset); @@ -18837,8 +18817,6 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) { rtx m; rtx (*extract) (rtx, rtx, rtx); - rtx (*load_unaligned) (rtx, rtx); - rtx (*store_unaligned) (rtx, rtx); machine_mode mode; switch (GET_MODE (op0)) @@ -18847,20 +18825,14 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) gcc_unreachable (); case V32QImode: extract = gen_avx_vextractf128v32qi; - load_unaligned = gen_avx_loaddquv32qi; - store_unaligned = gen_avx_storedquv32qi; mode = V16QImode; break; case V8SFmode: extract = gen_avx_vextractf128v8sf; - load_unaligned = gen_avx_loadups256; - store_unaligned = gen_avx_storeups256; mode = V4SFmode; break; case V4DFmode: extract = gen_avx_vextractf128v4df; - load_unaligned = gen_avx_loadupd256; - store_unaligned = gen_avx_storeupd256; mode = V2DFmode; break; } @@ -18877,14 +18849,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); emit_move_insn (op0, r); } - /* Normal *mov_internal pattern will handle - unaligned loads just fine if misaligned_operand - is true, and without the UNSPEC it can be combined - with arithmetic instructions. */ - else if (misaligned_operand (op1, GET_MODE (op1))) - emit_insn (gen_rtx_SET (op0, op1)); else - emit_insn (load_unaligned (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); } else if (MEM_P (op0)) { @@ -18897,7 +18863,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) emit_insn (extract (m, op1, const1_rtx)); } else - emit_insn (store_unaligned (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); } else gcc_unreachable (); @@ -18959,8 +18925,6 @@ void ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) { rtx op0, op1, orig_op0 = NULL_RTX, m; - rtx (*load_unaligned) (rtx, rtx); - rtx (*store_unaligned) (rtx, rtx); op0 = operands[0]; op1 = operands[1]; @@ -18985,30 +18949,8 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) /* FALLTHRU */ case MODE_VECTOR_FLOAT: - switch (GET_MODE (op0)) - { - default: - gcc_unreachable (); - case V16SImode: - load_unaligned = gen_avx512f_loaddquv16si; - store_unaligned = gen_avx512f_storedquv16si; - break; - case V16SFmode: - load_unaligned = gen_avx512f_loadups512; - store_unaligned = gen_avx512f_storeups512; - break; - case V8DFmode: - load_unaligned = gen_avx512f_loadupd512; - store_unaligned = gen_avx512f_storeupd512; - break; - } - if (MEM_P (op1)) - emit_insn (load_unaligned (op0, op1)); - else if (MEM_P (op0)) - emit_insn (store_unaligned (op0, op1)); - else - gcc_unreachable (); + emit_insn (gen_rtx_SET (op0, op1)); if (orig_op0) emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); break; @@ -19076,7 +19018,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) } op1 = gen_lowpart (V16QImode, op1); /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_loaddquv16qi (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); if (orig_op0) emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); } @@ -19090,7 +19032,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) || optimize_insn_for_size_p ()) { /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_loadupd (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); return; } @@ -19134,7 +19076,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) op0 = gen_reg_rtx (V4SFmode); } op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_loadups (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); if (orig_op0) emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); @@ -19166,7 +19108,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_storedquv16qi (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); } else if (TARGET_SSE2 && mode == V2DFmode) { @@ -19175,7 +19117,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL || optimize_insn_for_size_p ()) /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_storeupd (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); else { m = adjust_address (op0, DFmode, 0); @@ -19195,7 +19137,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) || optimize_insn_for_size_p ()) { op0 = gen_lowpart (V4SFmode, op0); - emit_insn (gen_sse_storeups (op0, op1)); + emit_insn (gen_rtx_SET (op0, op1)); } else { @@ -32654,9 +32596,9 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, /* SSE */ - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_movv4sf_internal, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_SSE, CODE_FOR_movv4sf_internal, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, @@ -32670,14 +32612,14 @@ static const struct builtin_description bdesc_special_args[] = /* SSE2 */ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_movv2df_internal, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_movv16qi_internal, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_movv2df_internal, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_movv16qi_internal, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, @@ -32702,12 +32644,12 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df_internal, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf_internal, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df_internal, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf_internal, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv32qi_internal, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv32qi_internal, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, @@ -32747,10 +32689,10 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, @@ -32759,9 +32701,9 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI }, @@ -32777,7 +32719,7 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI }, @@ -32806,16 +32748,16 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID }, /* AVX512BW */ - { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI }, - { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI }, - { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI }, - { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI }, /* AVX512VL */ - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCSHORT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCSHORT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCCHAR_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCCHAR_V16QI_UHI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI }, @@ -32832,26 +32774,26 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI }, - { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCINT64_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCINT64_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCINT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCINT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PINT64_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PINT64_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PINT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PINT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI_UHI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI }, @@ -33983,10 +33925,10 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI }, - { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, @@ -34728,8 +34670,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT }, - { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI }, - { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI }, @@ -39894,12 +39836,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PV16QI_V2DI_UQI: case VOID_FTYPE_PV16QI_V8SI_UQI: case VOID_FTYPE_PV16QI_V4SI_UQI: - case VOID_FTYPE_PV8HI_V8HI_UQI: - case VOID_FTYPE_PV16HI_V16HI_UHI: - case VOID_FTYPE_PV32HI_V32HI_USI: - case VOID_FTYPE_PV16QI_V16QI_UHI: - case VOID_FTYPE_PV32QI_V32QI_USI: - case VOID_FTYPE_PV64QI_V64QI_UDI: + case VOID_FTYPE_PCHAR_V64QI_UDI: + case VOID_FTYPE_PCHAR_V32QI_USI: + case VOID_FTYPE_PCHAR_V16QI_UHI: + case VOID_FTYPE_PSHORT_V32HI_USI: + case VOID_FTYPE_PSHORT_V16HI_UHI: + case VOID_FTYPE_PSHORT_V8HI_UQI: + case VOID_FTYPE_PINT_V16SI_UHI: + case VOID_FTYPE_PINT_V8SI_UQI: + case VOID_FTYPE_PINT_V4SI_UQI: + case VOID_FTYPE_PINT64_V8DI_UQI: + case VOID_FTYPE_PINT64_V4DI_UQI: + case VOID_FTYPE_PINT64_V2DI_UQI: + case VOID_FTYPE_PDOUBLE_V8DF_UQI: + case VOID_FTYPE_PDOUBLE_V4DF_UQI: + case VOID_FTYPE_PDOUBLE_V2DF_UQI: + case VOID_FTYPE_PFLOAT_V16SF_UHI: + case VOID_FTYPE_PFLOAT_V8SF_UQI: + case VOID_FTYPE_PFLOAT_V4SF_UQI: nargs = 2; klass = store; /* Reserve memory operand for target. */ @@ -39917,15 +39871,6 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case V2DI_FTYPE_PCV2DI_V2DI_UQI: case V4DI_FTYPE_PCV4DI_V4DI_UQI: case V8DI_FTYPE_PCV8DI_V8DI_UQI: - case V8HI_FTYPE_PCV8HI_V8HI_UQI: - case V16HI_FTYPE_PCV16HI_V16HI_UHI: - case V32HI_FTYPE_PCV32HI_V32HI_USI: - case V16QI_FTYPE_PCV16QI_V16QI_UHI: - case V32QI_FTYPE_PCV32QI_V32QI_USI: - case V64QI_FTYPE_PCV64QI_V64QI_UDI: - nargs = 3; - klass = load; - memory = 0; switch (icode) { /* These builtins and instructions require the memory @@ -39953,6 +39898,27 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, default: break; } + case V64QI_FTYPE_PCCHAR_V64QI_UDI: + case V32QI_FTYPE_PCCHAR_V32QI_USI: + case V16QI_FTYPE_PCCHAR_V16QI_UHI: + case V32HI_FTYPE_PCSHORT_V32HI_USI: + case V16HI_FTYPE_PCSHORT_V16HI_UHI: + case V8HI_FTYPE_PCSHORT_V8HI_UQI: + case V16SI_FTYPE_PCINT_V16SI_UHI: + case V8SI_FTYPE_PCINT_V8SI_UQI: + case V4SI_FTYPE_PCINT_V4SI_UQI: + case V8DI_FTYPE_PCINT64_V8DI_UQI: + case V4DI_FTYPE_PCINT64_V4DI_UQI: + case V2DI_FTYPE_PCINT64_V2DI_UQI: + case V8DF_FTYPE_PCDOUBLE_V8DF_UQI: + case V4DF_FTYPE_PCDOUBLE_V4DF_UQI: + case V2DF_FTYPE_PCDOUBLE_V2DF_UQI: + case V16SF_FTYPE_PCFLOAT_V16SF_UHI: + case V8SF_FTYPE_PCFLOAT_V8SF_UQI: + case V4SF_FTYPE_PCFLOAT_V4SF_UQI: + nargs = 3; + klass = load; + memory = 0; break; case VOID_FTYPE_UINT_UINT_UINT: case VOID_FTYPE_UINT64_UINT_UINT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 17596cf..ed0a1a6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -20,8 +20,6 @@ (define_c_enum "unspec" [ ;; SSE UNSPEC_MOVNT - UNSPEC_LOADU - UNSPEC_STOREU ;; SSE3 UNSPEC_LDDQU @@ -290,14 +288,6 @@ (define_mode_iterator VI1 [(V32QI "TARGET_AVX") V16QI]) -(define_mode_iterator VI_ULOADSTORE_BW_AVX512VL - [V64QI - V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")]) - -(define_mode_iterator VI_ULOADSTORE_F_AVX512VL - [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) - ;; All DImode vector integer modes (define_mode_iterator V_AVX [V16QI V8HI V4SI V2DI V4SF V2DF @@ -730,7 +720,8 @@ (V4SF "3") (V2DF "1")]) (define_mode_attr ssescalarsize - [(V8DI "64") (V4DI "64") (V2DI "64") + [(V4TI "64") (V2TI "64") (V1TI "64") + (V8DI "64") (V4DI "64") (V2DI "64") (V64QI "8") (V32QI "8") (V16QI "8") (V32HI "16") (V16HI "16") (V8HI "16") (V16SI "32") (V8SI "32") (V4SI "32") @@ -841,7 +832,7 @@ DONE; }) -(define_insn "*mov_internal" +(define_insn "mov_internal" [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m") (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))] "TARGET_SSE @@ -902,9 +893,8 @@ case MODE_V16SF: case MODE_V8SF: case MODE_V4SF: - if ((TARGET_AVX || TARGET_IAMCU) - && (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode))) + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) return "%vmovups\t{%1, %0|%0, %1}"; else return "%vmovaps\t{%1, %0|%0, %1}"; @@ -912,19 +902,17 @@ case MODE_V8DF: case MODE_V4DF: case MODE_V2DF: - if ((TARGET_AVX || TARGET_IAMCU) - && (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode))) + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) return "%vmovupd\t{%1, %0|%0, %1}"; else return "%vmovapd\t{%1, %0|%0, %1}"; case MODE_OI: case MODE_TI: - if ((TARGET_AVX || TARGET_IAMCU) - && (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode))) - return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}" + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) + return TARGET_AVX512VL ? "vmovdqu\t{%1, %0|%0, %1}" : "%vmovdqu\t{%1, %0|%0, %1}"; else return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" @@ -932,7 +920,11 @@ case MODE_XI: if (misaligned_operand (operands[0], mode) || misaligned_operand (operands[1], mode)) - return "vmovdqu64\t{%1, %0|%0, %1}"; + return (mode == V16SImode + || mode == V8DImode + || TARGET_AVX512BW) + ? "vmovdqu\t{%1, %0|%0, %1}" + : "vmovdqu64\t{%1, %0|%0, %1}"; else return "vmovdqa64\t{%1, %0|%0, %1}"; @@ -1154,62 +1146,6 @@ DONE; }) -(define_expand "_loadu" - [(set (match_operand:VF 0 "register_operand") - (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")] - UNSPEC_LOADU))] - "TARGET_SSE && " -{ - /* For AVX, normal *mov_internal pattern will handle unaligned loads - just fine if misaligned_operand is true, and without the UNSPEC it can - be combined with arithmetic instructions. If misaligned_operand is - false, still emit UNSPEC_LOADU insn to honor user's request for - misaligned load. */ - if (TARGET_AVX - && misaligned_operand (operands[1], mode)) - { - rtx src = operands[1]; - if () - src = gen_rtx_VEC_MERGE (mode, operands[1], - operands[2 * ], - operands[3 * ]); - emit_insn (gen_rtx_SET (operands[0], src)); - DONE; - } -}) - -(define_insn "*_loadu" - [(set (match_operand:VF 0 "register_operand" "=v") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "vm")] - UNSPEC_LOADU))] - "TARGET_SSE && " -{ - switch (get_attr_mode (insn)) - { - case MODE_V16SF: - case MODE_V8SF: - case MODE_V4SF: - return "%vmovups\t{%1, %0|%0, %1}"; - default: - return "%vmovu\t{%1, %0|%0, %1}"; - } -} - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set_attr "prefix" "maybe_vex") - (set (attr "mode") - (cond [(and (match_test " == 16") - (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) - (const_string "") - (match_test "TARGET_AVX") - (const_string "") - (match_test "optimize_function_for_size_p (cfun)") - (const_string "V4SF") - ] - (const_string "")))]) - ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets. (define_peephole2 [(set (match_operand:V2DF 0 "register_operand") @@ -1221,69 +1157,9 @@ (match_operand:DF 3 "memory_operand")))] "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" - [(set (match_dup 2) - (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))] + [(set (match_dup 2) (match_dup 4))] "operands[4] = adjust_address (operands[1], V2DFmode, 0);") -(define_insn "_storeu" - [(set (match_operand:VF 0 "memory_operand" "=m") - (unspec:VF - [(match_operand:VF 1 "register_operand" "v")] - UNSPEC_STOREU))] - "TARGET_SSE" -{ - switch (get_attr_mode (insn)) - { - case MODE_V16SF: - case MODE_V8SF: - case MODE_V4SF: - return "%vmovups\t{%1, %0|%0, %1}"; - default: - return "%vmovu\t{%1, %0|%0, %1}"; - } -} - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set_attr "prefix" "maybe_vex") - (set (attr "mode") - (cond [(and (match_test " == 16") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (match_test "TARGET_SSE_TYPELESS_STORES"))) - (const_string "") - (match_test "TARGET_AVX") - (const_string "") - (match_test "optimize_function_for_size_p (cfun)") - (const_string "V4SF") - ] - (const_string "")))]) - -(define_insn "_storeu_mask" - [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m") - (vec_merge:VF_AVX512VL - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "v")] - UNSPEC_STOREU) - (match_dup 0) - (match_operand: 2 "register_operand" "Yk")))] - "TARGET_AVX512F" -{ - switch (get_attr_mode (insn)) - { - case MODE_V16SF: - case MODE_V8SF: - case MODE_V4SF: - return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}"; - default: - return "vmovu\t{%1, %0%{%2%}|%0%{%2%}, %1}"; - } -} - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "memory" "store") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets. (define_peephole2 [(set (match_operand:DF 0 "memory_operand") @@ -1294,238 +1170,9 @@ (parallel [(const_int 1)])))] "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL && ix86_operands_ok_for_move_multiple (operands, false, DFmode)" - [(set (match_dup 4) - (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))] + [(set (match_dup 4) (match_dup 1))] "operands[4] = adjust_address (operands[0], V2DFmode, 0);") -/* For AVX, normal *mov_internal pattern will handle unaligned loads - just fine if misaligned_operand is true, and without the UNSPEC it can - be combined with arithmetic instructions. If misaligned_operand is - false, still emit UNSPEC_LOADU insn to honor user's request for - misaligned load. */ -(define_expand "_loaddqu" - [(set (match_operand:VI1 0 "register_operand") - (unspec:VI1 - [(match_operand:VI1 1 "nonimmediate_operand")] - UNSPEC_LOADU))] - "TARGET_SSE2 && && " -{ - if (TARGET_AVX - && misaligned_operand (operands[1], mode)) - { - rtx src = operands[1]; - if () - src = gen_rtx_VEC_MERGE (mode, operands[1], - operands[2 * ], - operands[3 * ]); - emit_insn (gen_rtx_SET (operands[0], src)); - DONE; - } -}) - -(define_expand "_loaddqu" - [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand") - (unspec:VI_ULOADSTORE_BW_AVX512VL - [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")] - UNSPEC_LOADU))] - "TARGET_AVX512BW" -{ - if (misaligned_operand (operands[1], mode)) - { - rtx src = operands[1]; - if () - src = gen_rtx_VEC_MERGE (mode, operands[1], - operands[2 * ], - operands[3 * ]); - emit_insn (gen_rtx_SET (operands[0], src)); - DONE; - } -}) - -(define_expand "_loaddqu" - [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand") - (unspec:VI_ULOADSTORE_F_AVX512VL - [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")] - UNSPEC_LOADU))] - "TARGET_AVX512F" -{ - if (misaligned_operand (operands[1], mode)) - { - rtx src = operands[1]; - if () - src = gen_rtx_VEC_MERGE (mode, operands[1], - operands[2 * ], - operands[3 * ]); - emit_insn (gen_rtx_SET (operands[0], src)); - DONE; - } -}) - -(define_insn "*_loaddqu" - [(set (match_operand:VI1 0 "register_operand" "=v") - (unspec:VI1 - [(match_operand:VI1 1 "nonimmediate_operand" "vm")] - UNSPEC_LOADU))] - "TARGET_SSE2 && && " -{ - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - return "%vmovups\t{%1, %0|%0, %1}"; - default: - if (!(TARGET_AVX512VL && TARGET_AVX512BW)) - return "%vmovdqu\t{%1, %0|%0, %1}"; - else - return "vmovdqu\t{%1, %0|%0, %1}"; - } -} - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set (attr "prefix_data16") - (if_then_else - (match_test "TARGET_AVX") - (const_string "*") - (const_string "1"))) - (set_attr "prefix" "maybe_vex") - (set (attr "mode") - (cond [(and (match_test " == 16") - (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) - (const_string "") - (match_test "TARGET_AVX") - (const_string "") - (match_test "optimize_function_for_size_p (cfun)") - (const_string "V4SF") - ] - (const_string "")))]) - -(define_insn "*_loaddqu" - [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v") - (unspec:VI_ULOADSTORE_BW_AVX512VL - [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")] - UNSPEC_LOADU))] - "TARGET_AVX512BW" - "vmovdqu\t{%1, %0|%0, %1}"; - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set_attr "prefix" "maybe_evex")]) - -(define_insn "*_loaddqu" - [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v") - (unspec:VI_ULOADSTORE_F_AVX512VL - [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")] - UNSPEC_LOADU))] - "TARGET_AVX512F" - "vmovdqu\t{%1, %0|%0, %1}"; - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set_attr "prefix" "maybe_evex")]) - -(define_insn "_storedqu" - [(set (match_operand:VI1 0 "memory_operand" "=m") - (unspec:VI1 - [(match_operand:VI1 1 "register_operand" "v")] - UNSPEC_STOREU))] - "TARGET_SSE2" -{ - switch (get_attr_mode (insn)) - { - case MODE_V16SF: - case MODE_V8SF: - case MODE_V4SF: - return "%vmovups\t{%1, %0|%0, %1}"; - default: - switch (mode) - { - case V32QImode: - case V16QImode: - if (!(TARGET_AVX512VL && TARGET_AVX512BW)) - return "%vmovdqu\t{%1, %0|%0, %1}"; - default: - return "vmovdqu\t{%1, %0|%0, %1}"; - } - } -} - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set (attr "prefix_data16") - (if_then_else - (match_test "TARGET_AVX") - (const_string "*") - (const_string "1"))) - (set_attr "prefix" "maybe_vex") - (set (attr "mode") - (cond [(and (match_test " == 16") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (match_test "TARGET_SSE_TYPELESS_STORES"))) - (const_string "") - (match_test "TARGET_AVX") - (const_string "") - (match_test "optimize_function_for_size_p (cfun)") - (const_string "V4SF") - ] - (const_string "")))]) - -(define_insn "_storedqu" - [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m") - (unspec:VI_ULOADSTORE_BW_AVX512VL - [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")] - UNSPEC_STOREU))] - "TARGET_AVX512BW" - "vmovdqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set_attr "prefix" "maybe_evex")]) - -(define_insn "_storedqu" - [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m") - (unspec:VI_ULOADSTORE_F_AVX512VL - [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")] - UNSPEC_STOREU))] - "TARGET_AVX512F" - "vmovdqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "ssememalign" "8") - (set_attr "prefix" "maybe_vex")]) - -(define_insn "_storedqu_mask" - [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m") - (vec_merge:VI48_AVX512VL - (unspec:VI48_AVX512VL - [(match_operand:VI48_AVX512VL 1 "register_operand" "v")] - UNSPEC_STOREU) - (match_dup 0) - (match_operand: 2 "register_operand" "Yk")))] - "TARGET_AVX512F" - "vmovdqu\t{%1, %0%{%2%}|%0%{%2%}, %1}" - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "memory" "store") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_storedqu_mask" - [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") - (vec_merge:VI12_AVX512VL - (unspec:VI12_AVX512VL - [(match_operand:VI12_AVX512VL 1 "register_operand" "v")] - UNSPEC_STOREU) - (match_dup 0) - (match_operand: 2 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vmovdqu\t{%1, %0%{%2%}|%0%{%2%}, %1}" - [(set_attr "type" "ssemov") - (set_attr "movu" "1") - (set_attr "memory" "store") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - (define_insn "_lddqu" [(set (match_operand:VI1 0 "register_operand" "=x") (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] @@ -15406,69 +15053,6 @@ (set_attr "memory" "none,load") (set_attr "mode" "TI")]) -(define_insn_and_split "*sse4_2_pcmpestr_unaligned" - [(set (match_operand:SI 0 "register_operand" "=c") - (unspec:SI - [(match_operand:V16QI 2 "register_operand" "x") - (match_operand:SI 3 "register_operand" "a") - (unspec:V16QI - [(match_operand:V16QI 4 "memory_operand" "m")] - UNSPEC_LOADU) - (match_operand:SI 5 "register_operand" "d") - (match_operand:SI 6 "const_0_to_255_operand" "n")] - UNSPEC_PCMPESTR)) - (set (match_operand:V16QI 1 "register_operand" "=Yz") - (unspec:V16QI - [(match_dup 2) - (match_dup 3) - (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) - (match_dup 5) - (match_dup 6)] - UNSPEC_PCMPESTR)) - (set (reg:CC FLAGS_REG) - (unspec:CC - [(match_dup 2) - (match_dup 3) - (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) - (match_dup 5) - (match_dup 6)] - UNSPEC_PCMPESTR))] - "TARGET_SSE4_2 - && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); - int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); - int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); - - if (ecx) - emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], - operands[3], operands[4], - operands[5], operands[6])); - if (xmm0) - emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], - operands[3], operands[4], - operands[5], operands[6])); - if (flags && !(ecx || xmm0)) - emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, - operands[2], operands[3], - operands[4], operands[5], - operands[6])); - if (!(flags || ecx || xmm0)) - emit_note (NOTE_INSN_DELETED); - - DONE; -} - [(set_attr "type" "sselog") - (set_attr "prefix_data16" "1") - (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") - (set_attr "length_immediate" "1") - (set_attr "memory" "load") - (set_attr "mode" "TI")]) - (define_insn "sse4_2_pcmpestri" [(set (match_operand:SI 0 "register_operand" "=c,c") (unspec:SI @@ -15606,60 +15190,6 @@ (set_attr "memory" "none,load") (set_attr "mode" "TI")]) -(define_insn_and_split "*sse4_2_pcmpistr_unaligned" - [(set (match_operand:SI 0 "register_operand" "=c") - (unspec:SI - [(match_operand:V16QI 2 "register_operand" "x") - (unspec:V16QI - [(match_operand:V16QI 3 "memory_operand" "m")] - UNSPEC_LOADU) - (match_operand:SI 4 "const_0_to_255_operand" "n")] - UNSPEC_PCMPISTR)) - (set (match_operand:V16QI 1 "register_operand" "=Yz") - (unspec:V16QI - [(match_dup 2) - (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) - (match_dup 4)] - UNSPEC_PCMPISTR)) - (set (reg:CC FLAGS_REG) - (unspec:CC - [(match_dup 2) - (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) - (match_dup 4)] - UNSPEC_PCMPISTR))] - "TARGET_SSE4_2 - && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); - int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); - int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); - - if (ecx) - emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], - operands[3], operands[4])); - if (xmm0) - emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], - operands[3], operands[4])); - if (flags && !(ecx || xmm0)) - emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, - operands[2], operands[3], - operands[4])); - if (!(flags || ecx || xmm0)) - emit_note (NOTE_INSN_DELETED); - - DONE; -} - [(set_attr "type" "sselog") - (set_attr "prefix_data16" "1") - (set_attr "prefix_extra" "1") - (set_attr "ssememalign" "8") - (set_attr "length_immediate" "1") - (set_attr "memory" "load") - (set_attr "mode" "TI")]) - (define_insn "sse4_2_pcmpistri" [(set (match_operand:SI 0 "register_operand" "=c,c") (unspec:SI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 90391a5..f97e63e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,31 @@ +2016-04-19 H.J. Lu + + PR target/69201 + * gcc.target/i386/avx256-unaligned-store-1.c (a): Make it + extern to force it misaligned. + (b): Likewise. + (c): Likewise. + (d): Likewise. + Check vmovups.*movv8sf_internal/3 instead of avx_storeups256. + Don't check `*' before movv4sf_internal. + * gcc.target/i386/avx256-unaligned-store-2.c: Check + vmovups.*movv32qi_internal/3 instead of avx_storeups256. + Don't check `*' before movv16qi_internal. + * gcc.target/i386/avx256-unaligned-store-3.c (a): Make it + extern to force it misaligned. + (b): Likewise. + (c): Likewise. + (d): Likewise. + Check vmovups.*movv4df_internal/3 instead of avx_storeupd256. + Don't check `*' before movv2df_internal. + * gcc.target/i386/avx256-unaligned-store-4.c (a): Make it + extern to force it misaligned. + (b): Likewise. + (c): Likewise. + (d): Likewise. + Check movv8sf_internal instead of avx_storeups256. + Check movups.*movv4sf_internal/3 instead of avx_storeups256. + 2016-04-19 Richard Biener PR tree-optimization/70171 diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c index 5e8c30d..d82aecf 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c @@ -3,7 +3,7 @@ #define N 1024 -float a[N], b[N+3], c[N], d[N]; +extern float a[N], b[N+3], c[N], d[N]; void avx_test (void) @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "avx_storeups256" } } */ -/* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovups.*movv8sf_internal/3" } } */ +/* { dg-final { scan-assembler "vmovups.*movv4sf_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c index eeabfe9..817be17 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c @@ -23,6 +23,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "avx_storedqu256" } } */ -/* { dg-final { scan-assembler "vmovups.*\\*movv16qi_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */ +/* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vextract.128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c index 6175d52..a439a66 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c @@ -3,7 +3,7 @@ #define N 1024 -double a[N], b[N+3], c[N], d[N]; +extern double a[N], b[N+3], c[N], d[N]; void avx_test (void) @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "avx_storeupd256" } } */ -/* { dg-final { scan-assembler "vmovups.*\\*movv2df_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */ +/* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c index 68ff923..463c1d8 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c @@ -3,7 +3,7 @@ #define N 1024 -float a[N], b[N+3], c[N]; +extern float a[N], b[N+3], c[N]; void avx_test (void) @@ -14,7 +14,6 @@ avx_test (void) b[i+3] = a[i] * c[i]; } -/* { dg-final { scan-assembler "avx_storeups256" } } */ -/* { dg-final { scan-assembler-not "sse_storeups" } } */ -/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */ +/* { dg-final { scan-assembler "vmovups.*movv8sf_internal/3" } } */ +/* { dg-final { scan-assembler-not "movups.*movv4sf_internal/3" } } */ /* { dg-final { scan-assembler-not "vextractf128" } } */ -- 2.7.4