* config/i386/sse.md (*vec_concatv2sf_sse4_1): New insn pattern.

author uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>

Thu, 15 May 2008 10:17:45 +0000 (10:17 +0000)

committer uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>

Thu, 15 May 2008 10:17:45 +0000 (10:17 +0000)
author uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 15 May 2008 10:17:45 +0000 (10:17 +0000)
committer uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 15 May 2008 10:17:45 +0000 (10:17 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 74c5765..a76ec9d 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2008-05-15  Uros Bizjak  <ubizjak@gmail.com>
+           H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/sse.md (*vec_concatv2sf_sse4_1): New insn pattern.
+       (*vec_concatv2si_sse4_1): Use vector_move_operand predicate
+       for operand 2.  Remove pinsr{q,d} with 0x0 immediate operand from
+       insn alternatives.  Add missing alternatives.
+       (*vec_concatv2di_rex64_sse4_1): Likewise.
+       (*vec_concatv2si_sse2): Use "x" register constraint instead of "Y2".
+       (*vec_concatv2di_rex64_sse): Rename from *vec_concatv2di_rex64.
+       Require TARGET_SSE.
+
  2008-05-15  Richard Guenther  <rguenther@suse.de>
  
         PR tree-optimization/36009
@@ -71,8 +83,7 @@
         (vrotl@var{m}3): Ditto.
         (vrotr@var{m}3): Ditto.
  
-       * config/i386/i386.md (PPERM_SRC): Move PPERM masks here from
-       i386.c.
+       * config/i386/i386.md (PPERM_SRC): Move PPERM masks here from i386.c.
         (PPERM_INVERT): Ditto.
         (PPERM_REVERSE): Ditto.
         (PPERM_REV_INV): Ditto.
@@ -122,8 +133,7 @@
         * config/rs6000/rs6000.c (bdesc_2arg): Change the names of vector
         shift patterns.
  
-       * config/rs6000/altivec.md (vashl<mode>3): Rename from
-       ashl<mode>3.
+       * config/rs6000/altivec.md (vashl<mode>3): Rename from ashl<mode>3.
         (vlshr<mode>3): Rename from vlshr<mode>3.
         (vashr<mode>3): Rename from vashr<mode>3.
         (mulv4sf3): Change the names of vector shift patterns.
@@ -133,8 +143,7 @@
         * config/spu/spu.c (spu_initialize_trampoline): Rename vector
         shift insns.
  
-       * config/spu/spu-builtins.def (SI_SHLH): Rename vector shift
-       insns.
+       * config/spu/spu-builtins.def (SI_SHLH): Rename vector shift insns.
         (SI_SHLHI): Ditto.
         (SI_SHL): Ditto.
         (SI_SHLI): Ditto.
@@ -176,9 +185,8 @@
  2008-05-14  Michael Meissner  <michael.meissner@amd.com>
  
         PR target/36224
-       * config/i386/sse.md (vec_widen_smult_hi_v4si): Delete, using
-       unsigned multiply gives the wrong value when doing widening
-       multiplies.
+       * config/i386/sse.md (vec_widen_smult_hi_v4si): Delete, using unsigned
+       multiply gives the wrong value when doing widening multiplies.
         (vec_widen_smult_lo_v4si): Ditto.
  
  2008-05-14  Kenneth Zadeck <zadeck@naturalbridge.com>
@@ -207,8 +215,7 @@
  2008-05-14  Adam Nemet  <anemet@caviumnetworks.com>
  
         * calls.c (emit_library_call_value_1): Restore code clearing
-       ECF_LIBCALL_BLOCK to ensure that we only call end_sequence
-       once.
+       ECF_LIBCALL_BLOCK to ensure that we only call end_sequence once.
  
  2008-05-14  Olivier Hainque  <hainque@adacore.com>
             Nicolas Roche  <roche@adacore.com>
@@ -234,7 +241,7 @@
         (emit_no_conflict_block): Removed.
         * optabls.h: (emit_no_conflict_block): Removed.
         * cse.c (cse_extended_basic_block): Remove search for
-       REG_NO_CONFLICT note.  
+       REG_NO_CONFLICT note.
         * global.c: Removed incorrect comment added in revision 117.
         * expr.c (convert_move): Change call to emit_no_conflict_block to
         emit_insn.
@@ -253,20 +260,19 @@
         REG_NO_CONFLICT notes.
         * loop_invariant.c (find_invariant_insn): Removed REG_NO_CONFLICT
         case.
-       * combine.c (can_combine_p, distribute_notes):  Removed REG_NO_CONFLICT
-       case.
-       * config/cris/cris.md (movdi pattern): Changed
-       emit_no_conflict_block to emit_insns.
+       * combine.c (can_combine_p, distribute_notes):  Removed
+       REG_NO_CONFLICT case.
+       * config/cris/cris.md (movdi pattern): Changed emit_no_conflict_block
+       to emit_insns.
         * config/mn10300/mn10300.md (absdf2, negdf2 patterns): Ditto.
         * config/m68k/m68k.md (negdf2, negxf2, absdf2, absxf2 patterns):
-       Ditto. 
+       Ditto.
         * reg-notes.def (NO_CONFLICT): Removed.
  
  2008-05-14  David S. Miller  <davem@davemloft.net>
  
         * config/sparc/sparc.c (sparc_profile_hook): If
-       NO_PROFILE_COUNTERS, don't generate and pass a label
-       into mcount.
+       NO_PROFILE_COUNTERS, don't generate and pass a label into mcount.
         * config/sparc/linux.h (NO_PROFILE_COUNTERS): Define as 1.
         * config/sparc/linux64.h (NO_PROFILE_COUNTERS): Likewise.
  
@@ -304,8 +310,8 @@
         tree-ssanames.c.  Convert to static inline.  Call make_ssa_name_fn.
         * omp-low.c (expand_omp_parallel):
         * tree-flow-inline.h (redirect_edge_var_map_result):
-       * tree-ssa.c (init_tree_ssa): Add argument FN.  Use it instead of cfun.
-       Update all users.
+       * tree-ssa.c (init_tree_ssa): Add argument FN.
+       Use it instead of cfun.  Update all users.
  
  2008-05-13  Tom Tromey  <tromey@redhat.com>
  
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index 23a63d8..6e781c1 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2257,6 +2257,24 @@
    [(set_attr "type" "sselog1")
     (set_attr "mode" "V4SF")])
  
+;; Although insertps takes register source, we prefer
+;; unpcklps with register source since it is shorter.
+(define_insn "*vec_concatv2sf_sse4_1"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,*y,*y")
+       (vec_concat:V2SF
+         (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0, m")
+         (match_operand:SF 2 "vector_move_operand"  " x,m,C,*y, C")))]
+  "TARGET_SSE4_1"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,*,*,*,*")
+   (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
+
  ;; ??? In theory we can match memory for the MMX alternative, but allowing
  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
  ;; alternatives pretty much forces the MMX alternative to be chosen.
@@ -4801,25 +4819,29 @@
     (set_attr "mode" "TI,V4SF")])
  
  (define_insn "*vec_concatv2si_sse4_1"
-  [(set (match_operand:V2SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,x ,*y ,*y")
         (vec_concat:V2SI
-         (match_operand:SI 1 "nonimmediate_operand" "0,rm")
-         (match_operand:SI 2 "nonimmediate_operand" "rm,0")))]
+         (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
+         (match_operand:SI 2 "vector_move_operand"  "rm,x,C ,*ym,C")))]
    "TARGET_SSE4_1"
    "@
-  pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
-  pinsrd\t{$0x0, %2, %0|%0, %2, 0x0}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+   pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,*,*,*,*")
+   (set_attr "mode" "TI,TI,TI,DI,DI")])
  
  ;; ??? In theory we can match memory for the MMX alternative, but allowing
  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
  ;; alternatives pretty much forces the MMX alternative to be chosen.
  (define_insn "*vec_concatv2si_sse2"
-  [(set (match_operand:V2SI 0 "register_operand"     "=Y2, Y2,*y,*y")
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
         (vec_concat:V2SI
-         (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
-         (match_operand:SI 2 "reg_or_0_operand"     " Y2,C  ,*y, C")))]
+         (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
+         (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
    "TARGET_SSE2"
    "@
     punpckldq\t{%2, %0|%0, %2}
@@ -4856,18 +4878,6 @@
    [(set_attr "type" "sselog,ssemov,ssemov")
     (set_attr "mode" "TI,V4SF,V2SF")])
  
-(define_insn "*vec_concatv2di_rex64_sse4_1"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
-       (vec_concat:V2DI
-         (match_operand:DI 1 "nonimmediate_operand" "0,rm")
-         (match_operand:DI 2 "nonimmediate_operand" "rm,0")))]
-  "TARGET_64BIT && TARGET_SSE4_1"
-  "@
-  pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
-  pinsrq\t{$0x0, %2, %0|%0, %2, 0x0}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
  (define_insn "vec_concatv2di"
    [(set (match_operand:V2DI 0 "register_operand"     "=Y2,?Y2,Y2,x,x,x")
         (vec_concat:V2DI
@@ -4884,12 +4894,31 @@
    [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
     (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
  
-(define_insn "*vec_concatv2di_rex64"
+(define_insn "*vec_concatv2di_rex64_sse4_1"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,Yi,!x,x,x,x,x")
+       (vec_concat:V2DI
+         (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m")
+         (match_operand:DI 2 "vector_move_operand"  "rm,C,C ,C ,x,x,m,0")))]
+  "TARGET_64BIT && TARGET_SSE4_1"
+  "@
+   pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
+   movq\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   punpcklqdq\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
+   (set_attr "prefix_extra" "1,*,*,*,*,*,*,*")
+   (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+
+(define_insn "*vec_concatv2di_rex64_sse"
    [(set (match_operand:V2DI 0 "register_operand"     "=Y2,Yi,!Y2,Y2,x,x,x")
         (vec_concat:V2DI
           (match_operand:DI 1 "nonimmediate_operand" "  m,r ,*y ,0 ,0,0,m")
           (match_operand:DI 2 "vector_move_operand"  "  C,C ,C  ,Y2,x,m,0")))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && TARGET_SSE"
    "@
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index d8c7cae..4c08485 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,9 +1,14 @@
+2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * gcc.target/i386/sse-set-ps-1.c: New.
+       * gcc.target/i386/sse4_1-set-ps-1.c: Likewise.
+
  2008-05-15  Richard Guenther  <rguenther@suse.de>
  
         PR tree-optimization/36009
         PR tree-optimization/36204
         * gcc.dg/tree-ssa/ssa-lim-5.c: New testcase.
-       * gcc.dg/tree-ssa/ssa-lim-6.c: Likewise..
+       * gcc.dg/tree-ssa/ssa-lim-6.c: Likewise.
  
  2008-05-15  Richard Guenther  <rguenther@suse.de>
  
diff --git a/gcc/testsuite/gcc.target/i386/sse-set-ps-1.c b/gcc/testsuite/gcc.target/i386/sse-set-ps-1.c

new file mode 100644 (file)

index 0000000..8232c72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-set-ps-1.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse" } */
+
+#include "sse-check.h"
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <xmmintrin.h>
+
+static void
+__attribute__((noinline))
+test (float *v)
+{
+  union
+    {
+      __m128 x;
+      float f[4];
+    } u;
+  unsigned int i;
+  
+  u.x = _mm_set_ps (v[3], v[2], v[1], v[0]);
+
+  for (i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+    if (v[i] != u.f[i])
+      {
+#ifdef DEBUG
+       printf ("%i: %f != %f\n", i, v[i], u.f[i]);
+#endif
+       abort ();
+      }
+}
+
+static void
+sse_test (void)
+{
+  float v[4] = { -3, 2, 1, 9 };
+  test (v);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c

new file mode 100644 (file)

index 0000000..fe77d94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <xmmintrin.h>
+
+static void
+__attribute__((noinline))
+test (float *v)
+{
+  union
+    {
+      __m128 x;
+      float f[4];
+    } u;
+  unsigned int i;
+  
+  u.x = _mm_set_ps (v[3], v[2], v[1], v[0]);
+
+  for (i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+    if (v[i] != u.f[i])
+      {
+#ifdef DEBUG
+       printf ("%i: %f != %f\n", i, v[i], u.f[i]);
+#endif
+       abort ();
+      }
+}
+
+static void
+sse4_1_test (void)
+{
+  float v[4] = { -3, 2, 1, 9 };
+  test (v);
+}
author	uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
	Thu, 15 May 2008 10:17:45 +0000 (10:17 +0000)
committer	uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>
	Thu, 15 May 2008 10:17:45 +0000 (10:17 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/i386/sse.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/i386/sse-set-ps-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c	[new file with mode: 0644]	patch \| blob