From 125886c749998b8f2bd8a49de7350755e92db718 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 4 May 2005 00:16:01 +0200 Subject: [PATCH] re PR rtl-optimization/21239 (Illegal elimination of SSE2 load/store using xmm intrinsics) * config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th item in V4SF mode. PR rtl-optimization/21239 * combine.c (combine_simplify_rtx) : Fix a typo. * gcc.dg/i386-sse-11.c: New test. From-SVN: r99186 --- gcc/ChangeLog | 8 ++++ gcc/combine.c | 2 +- gcc/config/i386/i386.c | 21 +++++---- gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.dg/i386-sse-11.c | 92 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/i386-sse-11.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b565816..fa84ec7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2005-05-04 Jakub Jelinek + + * config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th + item in V4SF mode. + + PR rtl-optimization/21239 + * combine.c (combine_simplify_rtx) : Fix a typo. + 2005-05-03 Kazu Hirata * tree-flow.h (tree_ann_common_d): Move aux to ... diff --git a/gcc/combine.c b/gcc/combine.c index d9e0b4f..251c1ee 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -4742,7 +4742,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest) if (GET_CODE (op0) == VEC_CONCAT) { HOST_WIDE_INT op0_size = GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))); - if (op0_size < offset) + if (offset < op0_size) op0 = XEXP (op0, 0); else { diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ea10926..49d22fd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17022,32 +17022,35 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) break; case 1: - /* tmp = op0 = A B C D */ + /* tmp = target = A B C D */ tmp = copy_to_reg (target); - - /* op0 = C C D D */ + /* target = A A B B */ emit_insn (gen_sse_unpcklps (target, target, target)); - - /* op0 = C C D X */ + /* target = X A B B */ ix86_expand_vector_set (false, target, val, 0); - - /* op0 = A B X D */ + /* target = A X C D */ emit_insn (gen_sse_shufps_1 (target, target, tmp, GEN_INT (1), GEN_INT (0), GEN_INT (2+4), GEN_INT (3+4))); return; case 2: + /* tmp = target = A B C D */ tmp = copy_to_reg (target); - ix86_expand_vector_set (false, target, val, 0); + /* tmp = X B C D */ + ix86_expand_vector_set (false, tmp, val, 0); + /* target = A B X D */ emit_insn (gen_sse_shufps_1 (target, target, tmp, GEN_INT (0), GEN_INT (1), GEN_INT (0+4), GEN_INT (3+4))); return; case 3: + /* tmp = target = A B C D */ tmp = copy_to_reg (target); - ix86_expand_vector_set (false, target, val, 0); + /* tmp = X B C D */ + ix86_expand_vector_set (false, tmp, val, 0); + /* target = A B X D */ emit_insn (gen_sse_shufps_1 (target, target, tmp, GEN_INT (0), GEN_INT (1), GEN_INT (2+4), GEN_INT (0+4))); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 53428bf..348b911 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2005-05-04 Jakub Jelinek + + PR rtl-optimization/21239 + * gcc.dg/i386-sse-11.c: New test. + 2005-05-03 Alexandre Oliva PR target/16888 diff --git a/gcc/testsuite/gcc.dg/i386-sse-11.c b/gcc/testsuite/gcc.dg/i386-sse-11.c new file mode 100644 index 0000000..fd72047 --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-sse-11.c @@ -0,0 +1,92 @@ +/* PR rtl-optimization/21239 */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2" } */ +#include +#include "i386-cpuid.h" + +extern void abort (void); + +void +foo (unsigned int x, double *y, const double *z) +{ + __m128d tmp; + while (x) + { + tmp = _mm_load_sd (z); + _mm_store_sd (y, tmp); + --x; ++z; ++y; + } +} + +void +bar (unsigned int x, float *y, const float *z) +{ + __m128 tmp; + unsigned int i; + for (i = 0; i < x; ++i) + { + tmp = (__m128) { *z, 0, 0, 0 }; + *y = __builtin_ia32_vec_ext_v4sf (tmp, 0); + ++z; ++y; + } + for (i = 0; i < x; ++i) + { + tmp = (__m128) { 0, *z, 0, 0 }; + *y = __builtin_ia32_vec_ext_v4sf (tmp, 1); + ++z; ++y; + } + for (i = 0; i < x; ++i) + { + tmp = (__m128) { 0, 0, *z, 0 }; + *y = __builtin_ia32_vec_ext_v4sf (tmp, 2); + ++z; ++y; + } + for (i = 0; i < x; ++i) + { + tmp = (__m128) { 0, 0, 0, *z }; + *y = __builtin_ia32_vec_ext_v4sf (tmp, 3); + ++z; ++y; + } +} + +void __attribute__((noinline)) +run_tests (void) +{ + unsigned int i; + double a[16], b[16]; + float c[16], d[16]; + for (i = 0; i < 16; ++i) + { + a[i] = 1; + b[i] = 2; + c[i] = 3; + d[i] = 4; + } + foo (16, a, b); + bar (4, c, d); + for (i = 0; i < 16; ++i) + { + if (a[i] != 2) + abort (); + if (c[i] != 4) + abort (); + } +} + +int +main () +{ + unsigned long cpu_facilities; + unsigned int i; + double a[19], b[19]; + + cpu_facilities = i386_cpuid (); + + if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV)) + != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV)) + /* If host has no vector support, pass. */ + return 0; + + run_tests (); + return 0; +} -- 2.7.4