+2005-05-04 Jakub Jelinek <jakub@redhat.com>
+
+ * config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th
+ item in V4SF mode.
+
+ PR rtl-optimization/21239
+ * combine.c (combine_simplify_rtx) <case VEC_SELECT>: Fix a typo.
+
2005-05-03 Kazu Hirata <kazu@cs.umass.edu>
* tree-flow.h (tree_ann_common_d): Move aux to ...
if (GET_CODE (op0) == VEC_CONCAT)
{
HOST_WIDE_INT op0_size = GET_MODE_SIZE (GET_MODE (XEXP (op0, 0)));
- if (op0_size < offset)
+ if (offset < op0_size)
op0 = XEXP (op0, 0);
else
{
break;
case 1:
- /* tmp = op0 = A B C D */
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
-
- /* op0 = C C D D */
+ /* target = A A B B */
emit_insn (gen_sse_unpcklps (target, target, target));
-
- /* op0 = C C D X */
+ /* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
-
- /* op0 = A B X D */
+ /* target = A X C D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (1), GEN_INT (0),
GEN_INT (2+4), GEN_INT (3+4)));
return;
case 2:
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
- ix86_expand_vector_set (false, target, val, 0);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (0+4), GEN_INT (3+4)));
return;
case 3:
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
- ix86_expand_vector_set (false, target, val, 0);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (2+4), GEN_INT (0+4)));
+2005-05-04 Jakub Jelinek <jakub@redhat.com>
+
+ PR rtl-optimization/21239
+ * gcc.dg/i386-sse-11.c: New test.
+
2005-05-03 Alexandre Oliva <aoliva@redhat.com>
PR target/16888
--- /dev/null
+/* PR rtl-optimization/21239 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse2" } */
+#include <emmintrin.h>
+#include "i386-cpuid.h"
+
+extern void abort (void);
+
+void
+foo (unsigned int x, double *y, const double *z)
+{
+ __m128d tmp;
+ while (x)
+ {
+ tmp = _mm_load_sd (z);
+ _mm_store_sd (y, tmp);
+ --x; ++z; ++y;
+ }
+}
+
+void
+bar (unsigned int x, float *y, const float *z)
+{
+ __m128 tmp;
+ unsigned int i;
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { *z, 0, 0, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 0);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, *z, 0, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 1);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, 0, *z, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 2);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, 0, 0, *z };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 3);
+ ++z; ++y;
+ }
+}
+
+void __attribute__((noinline))
+run_tests (void)
+{
+ unsigned int i;
+ double a[16], b[16];
+ float c[16], d[16];
+ for (i = 0; i < 16; ++i)
+ {
+ a[i] = 1;
+ b[i] = 2;
+ c[i] = 3;
+ d[i] = 4;
+ }
+ foo (16, a, b);
+ bar (4, c, d);
+ for (i = 0; i < 16; ++i)
+ {
+ if (a[i] != 2)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ unsigned long cpu_facilities;
+ unsigned int i;
+ double a[19], b[19];
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ /* If host has no vector support, pass. */
+ return 0;
+
+ run_tests ();
+ return 0;
+}