From 4c98bdadcb67e12c63882587d96ab6335711f009 Mon Sep 17 00:00:00 2001 From: Sunil K Pandey Date: Thu, 11 Jul 2019 19:09:37 +0000 Subject: [PATCH] i386: Add AVX512 unaligned intrinsics __m512i _mm512_loadu_epi64( void * sa); void _mm512_storeu_epi64(void * d, __m512i a); __m512i _mm512_loadu_epi32( void * sa); void _mm512_storeu_epi32(void * d, __m512i a); void _mm256_storeu_epi64(void * d, __m256i a); void _mm_storeu_epi64(void * d, __m128i a); void _mm256_storeu_epi32(void * d, __m256i a); void _mm_storeu_epi32(void * d, __m128i a); Tested on x86-64. 2019-07-11 Sunil K Pandey gcc/ PR target/90980 * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New. (_mm512_storeu_epi64): Likewise. (_mm512_loadu_epi32): Likewise. (_mm512_storeu_epi32): Likewise. * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New. (_mm_storeu_epi64): Likewise. (_mm256_storeu_epi32): Likewise. (_mm_storeu_epi32): Likewise. gcc/testsuite/ PR target/90980 * gcc.target/i386/pr90980-1.c: New test. * gcc.target/i386/pr90980-2.c: Likewise. * gcc.target/i386/pr90980-3.c: Likewise. From-SVN: r273416 --- gcc/ChangeLog | 12 ++++++++++++ gcc/config/i386/avx512fintrin.h | 28 ++++++++++++++++++++++++++++ gcc/config/i386/avx512vlintrin.h | 28 ++++++++++++++++++++++++++++ gcc/testsuite/ChangeLog | 7 +++++++ gcc/testsuite/gcc.target/i386/pr90980-1.c | 17 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-2.c | 17 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr90980-3.c | 20 ++++++++++++++++++++ 7 files changed, 129 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90980-3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aa9e05c..235cc53 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2019-07-11 Sunil K Pandey + + PR target/90980 + * config/i386/avx512fintrin.h (_mm512_loadu_epi64): New. + (_mm512_storeu_epi64): Likewise. + (_mm512_loadu_epi32): Likewise. + (_mm512_storeu_epi32): Likewise. + * config/i386/avx512vlintrin.h (_mm256_storeu_epi64): New. + (_mm_storeu_epi64): Likewise. + (_mm256_storeu_epi32): Likewise. + (_mm_storeu_epi32): Likewise. + 2019-07-11 Segher Boessenkool * config/rs6000/rs6000-logue.c: Add Modula-2 to comment. diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e35eedb..454fd3d 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -6352,6 +6352,13 @@ _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi64 (void const *__P) +{ + return *(__m512i_u *) __P; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, @@ -6371,6 +6378,13 @@ _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi64 (void *__P, __m512i __A) +{ + *(__m512i_u *) __P = (__m512i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A, @@ -6386,6 +6400,13 @@ _mm512_loadu_si512 (void const *__P) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_epi32 (void const *__P) +{ + return *(__m512i_u *) __P; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, @@ -6412,6 +6433,13 @@ _mm512_storeu_si512 (void *__P, __m512i __A) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_epi32 (void *__P, __m512i __A) +{ + *(__m512i_u *) __P = (__m512i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 3eaf817..bd8746d 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -757,6 +757,13 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi64 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqudi256_mask ((long long *) __P, @@ -766,6 +773,13 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi64 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqudi128_mask ((long long *) __P, @@ -813,6 +827,13 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_epi32 (void *__P, __m256i __A) +{ + *(__m256i_u *) __P = (__m256i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqusi256_mask ((int *) __P, @@ -822,6 +843,13 @@ _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_epi32 (void *__P, __m128i __A) +{ + *(__m128i_u *) __P = (__m128i_u) __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqusi128_mask ((int *) __P, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f4a6d05..cfc96b0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-07-11 Sunil K Pandey + + PR target/90980 + * gcc.target/i386/pr90980-1.c: New test. + * gcc.target/i386/pr90980-2.c: Likewise. + * gcc.target/i386/pr90980-3.c: Likewise. + 2019-07-11 Yannick Moy * gnat.dg/loop_entry1.adb: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c new file mode 100644 index 0000000..72a30dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include + +int *a; +long long *b; +volatile __m128i xx; +volatile __m128i xx1; + +void extern +avx512vl_test (void) +{ + _mm_storeu_epi32 (a, xx); + _mm_storeu_epi64 (b, xx1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-2.c b/gcc/testsuite/gcc.target/i386/pr90980-2.c new file mode 100644 index 0000000..b1980e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu\[0-9\]*\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include + +int *a; +long long *b; +volatile __m256i yy; +volatile __m256i yy1; + +void extern +avx512vl_test (void) +{ + _mm256_storeu_epi32 (a, yy); + _mm256_storeu_epi64 (b, yy1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr90980-3.c b/gcc/testsuite/gcc.target/i386/pr90980-3.c new file mode 100644 index 0000000..d839ee0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90980-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ + +#include + +int *a; +long long *b; +volatile __m512i zz; +volatile __m512i zz1; + +void extern +avx512f_test (void) +{ + zz = _mm512_loadu_epi32 (a); + _mm512_storeu_epi32 (a, zz); + zz1 = _mm512_loadu_epi64 (b); + _mm512_storeu_epi64 (b, zz1); +} -- 2.7.4