From 65b82caa2b4417c19b28e533298dbf61d4ba230b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 7 Jan 2009 14:56:14 +0000 Subject: [PATCH] AVX Programming Reference (December, 2008) gcc/ 2009-01-07 H.J. Lu AVX Programming Reference (December, 2008) * config/i386/avxintrin.h (_mm256_stream_si256): New. (_mm256_stream_pd): Likewise. (_mm256_stream_ps): Likewise. * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256, IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256. (ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI. (bdesc_special_args): Add __builtin_ia32_movntdq256, __builtin_ia32_movntpd256 and __builtin_ia32_movntps256. (ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI. (ix86_expand_special_args_builtin): Likewise. * config/i386/sse.md (AVXMODEDI): New. (avx_movnt): Likewise. (avx_movnt): Likewise. (_movnt): Remove AVX support. (sse2_movntv2di): Likewise. gcc/testsuite/ 2009-01-07 H.J. Lu AVX Programming Reference (December, 2008) * gcc.target/i386/avx-vmovntdq-256-1.c: New. * gcc.target/i386/avx-vmovntpd-256-1.c: Likewise. * gcc.target/i386/avx-vmovntps-256-1.c: Likewise. * gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte. * gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise. From-SVN: r143157 --- gcc/ChangeLog | 21 +++++++++++++++ gcc/config/i386/avxintrin.h | 18 +++++++++++++ gcc/config/i386/i386.c | 17 ++++++++++++ gcc/config/i386/sse.md | 31 +++++++++++++++++++--- gcc/testsuite/ChangeLog | 10 +++++++ gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c | 27 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c | 25 +++++++++++++++++ gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c | 26 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c | 2 +- gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c | 2 +- 10 files changed, 173 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 81804116..96013c1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2009-01-07 H.J. Lu + + AVX Programming Reference (December, 2008) + * config/i386/avxintrin.h (_mm256_stream_si256): New. + (_mm256_stream_pd): Likewise. + (_mm256_stream_ps): Likewise. + + * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256, + IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256. + (ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI. + (bdesc_special_args): Add __builtin_ia32_movntdq256, + __builtin_ia32_movntpd256 and __builtin_ia32_movntps256. + (ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI. + (ix86_expand_special_args_builtin): Likewise. + + * config/i386/sse.md (AVXMODEDI): New. + (avx_movnt): Likewise. + (avx_movnt): Likewise. + (_movnt): Remove AVX support. + (sse2_movntv2di): Likewise. + 2009-01-07 Richard Guenther PR middle-end/38751 diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 5780910..2529c75 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -968,6 +968,24 @@ _mm256_lddqu_si256 (__m256i const *__P) return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P); } +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_stream_si256 (__m256i *__A, __m256i __B) +{ + __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_stream_pd (double *__A, __m256d __B) +{ + __builtin_ia32_movntpd256 (__A, (__v4df)__B); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_stream_ps (float *__P, __m256 __A) +{ + __builtin_ia32_movntps256 (__P, (__v8sf)__A); +} + extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rcp_ps (__m256 __A) { diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c4d0449..df2be1c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19942,6 +19942,9 @@ enum ix86_builtins IX86_BUILTIN_STOREUPD256, IX86_BUILTIN_STOREUPS256, IX86_BUILTIN_LDDQU256, + IX86_BUILTIN_MOVNTDQ256, + IX86_BUILTIN_MOVNTPD256, + IX86_BUILTIN_MOVNTPS256, IX86_BUILTIN_LOADDQU256, IX86_BUILTIN_STOREDQU256, IX86_BUILTIN_MASKLOADPD, @@ -20413,6 +20416,7 @@ enum ix86_special_builtin_type V2DF_FTYPE_PCV2DF_V2DF, V2DI_FTYPE_PV2DI, VOID_FTYPE_PV2SF_V4SF, + VOID_FTYPE_PV4DI_V4DI, VOID_FTYPE_PV2DI_V2DI, VOID_FTYPE_PCHAR_V32QI, VOID_FTYPE_PCHAR_V16QI, @@ -20652,6 +20656,10 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF }, @@ -22183,6 +22191,11 @@ ix86_init_mmx_sse_builtins (void) V8SI_type_node, V4SI_type_node, integer_type_node, NULL_TREE); + tree pv4di_type_node = build_pointer_type (V4DI_type_node); + tree void_ftype_pv4di_v4di + = build_function_type_list (void_type_node, + pv4di_type_node, V4DI_type_node, + NULL_TREE); tree v8sf_ftype_v8sf_v4sf_int = build_function_type_list (V8SF_type_node, V8SF_type_node, V4SF_type_node, @@ -22369,6 +22382,9 @@ ix86_init_mmx_sse_builtins (void) case VOID_FTYPE_PV2SF_V4SF: type = void_ftype_pv2sf_v4sf; break; + case VOID_FTYPE_PV4DI_V4DI: + type = void_ftype_pv4di_v4di; + break; case VOID_FTYPE_PV2DI_V2DI: type = void_ftype_pv2di_v2di; break; @@ -24215,6 +24231,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, memory = 0; break; case VOID_FTYPE_PV2SF_V4SF: + case VOID_FTYPE_PV4DI_V4DI: case VOID_FTYPE_PV2DI_V2DI: case VOID_FTYPE_PCHAR_V32QI: case VOID_FTYPE_PCHAR_V16QI: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e9f8639..393a316 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -35,6 +35,9 @@ ;; All QI vector modes handled by AVX (define_mode_iterator AVXMODEQI [V32QI V16QI]) +;; All DI vector modes handled by AVX +(define_mode_iterator AVXMODEDI [V4DI V2DI]) + ;; All vector modes handled by AVX (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) @@ -383,26 +386,46 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) +(define_insn "avx_movnt" + [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") + (unspec:AVXMODEF2P + [(match_operand:AVXMODEF2P 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "AVX_VEC_FLOAT_MODE_P (mode)" + "vmovntp\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + (define_insn "_movnt" [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m") (unspec:SSEMODEF2P [(match_operand:SSEMODEF2P 1 "register_operand" "x")] UNSPEC_MOVNT))] "SSE_VEC_FLOAT_MODE_P (mode)" - "%vmovntp\t{%1, %0|%0, %1}" + "movntp\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) +(define_insn "avx_movnt" + [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m") + (unspec:AVXMODEDI + [(match_operand:AVXMODEDI 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_AVX" + "vmovntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + (define_insn "sse2_movntv2di" [(set (match_operand:V2DI 0 "memory_operand" "=m") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] UNSPEC_MOVNT))] "TARGET_SSE2" - "%vmovntdq\t{%1, %0|%0, %1}" + "movntdq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix_data16" "1") - (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) (define_insn "sse2_movntsi" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dd39cc1..125ae61 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2009-01-07 H.J. Lu + + AVX Programming Reference (December, 2008) + * gcc.target/i386/avx-vmovntdq-256-1.c: New. + * gcc.target/i386/avx-vmovntpd-256-1.c: Likewise. + * gcc.target/i386/avx-vmovntps-256-1.c: Likewise. + + * gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte. + * gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise. + 2009-01-06 Thomas Koenig PR fortran/38220 diff --git a/gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c new file mode 100644 index 0000000..5caf34e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +static void +__attribute__((noinline)) +test (__m256i *p, __m256i s) +{ + return _mm256_stream_si256 (p, s); +} + +static void +avx_test (void) +{ + union256i_d u; + int e[8] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1}; + + u.x = _mm256_set_epi32 (2434, 6845, 3789, 4683, + 4623, 2236, 8295, 1084); + + test ((__m256i *)e, u.x); + + if (check_union256i_d (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c new file mode 100644 index 0000000..f17deaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +static void +__attribute__((noinline)) +test (double *p, __m256d s) +{ + return _mm256_stream_pd (p, s); +} + +static void +avx_test (void) +{ + union256d u; + double e[4] __attribute__ ((aligned(32))) = {1,1,1,1}; + + u.x = _mm256_set_pd (2134.3343, 1234.635654, -13443.35, 43.35345); + test (e, u.x); + + if (check_union256d (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c new file mode 100644 index 0000000..9f79403 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +static void +__attribute__((noinline)) +test (float *p, __m256 s) +{ + return _mm256_stream_ps (p, s); +} + +static void +avx_test (void) +{ + union256 u; + float e[8] __attribute__ ((aligned(32))); + + u.x = _mm256_set_ps (24.43, 68.346, -43.35, 546.46, + 46.9, -2.78, 82.9, -0.4); + test (e, u.x); + + if (check_union256 (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c b/gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c index b219e29..d36eaef 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c +++ b/gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c @@ -24,7 +24,7 @@ static void TEST (void) { union128i_d u; - int e[4] __attribute__ ((aligned(32))); + int e[4] __attribute__ ((aligned(16))); u.x = _mm_set_epi32 (21, 34, 334, 8567); diff --git a/gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c b/gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c index 7f5274e..edcb90c 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c +++ b/gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c @@ -24,7 +24,7 @@ static void TEST (void) { union128d u; - double e[2] __attribute__ ((aligned(32))); + double e[2] __attribute__ ((aligned(16))); u.x = _mm_set_pd (2134.3343,1234.635654); test (e, u.x); -- 2.7.4